[S_API FAIL] SteamAPI_Init() failed; SteamAPI_IsSteamRunning() failed. [S_API FAIL] SteamAPI_Init() failed; unable to locate a running instance of Steam, or a local steamclient.so. GUID Assets\dlc\dlc_01\mongol.civ5pkg 7a036b7fb9a80e8dea7b73fb58c5a288 GUID Assets\dlc\dlc_02\spaininca.civ5pkg 4f75e72761f4c6019b55a0a7b90444a2 GUID Assets\dlc\dlc_03\polynesia.civ5pkg 99ac9d5f6ca4b5bed0ab89c0fd3b9e6d GUID Assets\dlc\dlc_04\denmark.civ5pkg 0efb155307bd6d14c9290b49b5364a3e GUID Assets\dlc\dlc_05\korea.civ5pkg 9f4df81cf712ae9480737f816bf6f4c8 GUID Assets\dlc\dlc_06\ancientwonders.civ5pkg 92b102db9a3c7dc068030c3ce33bbb48 GUID Assets\dlc\dlc_07\civcomplete.civ5pkg eb01a0be4d8e5312f53b042c8a7c30b5 GUID Assets\dlc\dlc_deluxe\babylon.civ5pkg 712495341921f2b288746c6d44fd6867 GUID Assets\dlc\dlc_sp_maps\dlc_sp_maps.civ5pkg 52b285c37939913e0a5b72933bb06067 GUID Assets\dlc\dlc_sp_maps_2\dlc_sp_maps_2.civ5pkg 16a61e7a2a7bb4bc2d1f677b5bb58ff4 GUID Assets\dlc\dlc_sp_maps_3\dlc_sp_maps_3.civ5pkg 1954db58e0a60b018969c49440fa01ef GUID Assets\dlc\expansion\expansion1.civ5pkg 8bc30c58378345cb0911c5848926f1ff GUID Assets\dlc\expansion2\expansion2.civ5pkg 31dfaa9838c5b051d4c2112ddd9e7eb3 GUID Assets\dlc\shared\upgrade1.civ5pkg e818fa28902977b42ee5e3426f5112e6 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[0:3], s[8:9], 0x4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000100 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x0 ; C0800900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 S_ENDPGM ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_MOV_F32 v0, P0, 3, 0, [m0] ; C8020302 V_INTERP_MOV_F32 v1, P0, 2, 0, [m0] ; C8060202 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_INTERP_MOV_F32 v1, P0, 1, 0, [m0] ; C8060102 V_INTERP_MOV_F32 v2, P0, 0, 0, [m0] ; C80A0002 V_CVT_PKRTZ_F16_F32_e32 v1, v2, v1 ; 5E020302 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_MOV_F32 v0, P0, 3, 0, [m0] ; C8020302 V_INTERP_MOV_F32 v1, P0, 2, 0, [m0] ; C8060202 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_INTERP_MOV_F32 v1, P0, 1, 0, [m0] ; C8060102 V_INTERP_MOV_F32 v2, P0, 0, 0, [m0] ; C80A0002 V_CVT_PKRTZ_F16_F32_e32 v1, v2, v1 ; 5E020302 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_PKRTZ_F16_F32_e32 v4, v2, v3 ; 5E080702 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v1 ; 5E000300 EXP 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MOV TEMP[0].x, IN[0].xxxx 2: MOV TEMP[0].y, IN[0].yyyy 3: MOV TEMP[1].xy, IN[1].xyxx 4: MOV OUT[1], TEMP[1] 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[0:3], s[8:9], 0x4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000100 V_MOV_B32_e32 v5, 0.000000e+00 ; 7E0A0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v5, v5 ; F800020F 05050201 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x0 ; C0800900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v5, v4 ; F80008CF 04050100 S_ENDPGM ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_PKRTZ_F16_F32_e32 v4, v2, v3 ; 5E080702 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v1 ; 5E000300 EXP 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %35, float %36, float %37, float %38) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v4 ; 10020804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 EXP 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v2, v6, v5, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v1, v1, v7, v6, 0, 0 ; D2820001 041A0F01 V_MOV_B32_e32 v2, 1.000000e+00 ; 7E0402F2 V_MOV_B32_e32 v3, 0.000000e+00 ; 7E060280 EXP 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v4, s4 ; 7E080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v0, v1, v6, v5, 0, 0 ; D2820000 04160D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v0 ; 10020004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x23 ; C2020123 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x11 ; C2028111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MUL_F32_e32 v1, v1, v2 ; 10020501 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x22 ; C2020122 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v4, s4, v5, 0, 0 ; D2820005 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v2 ; 100A0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x20 ; C2000120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v2 ; 10000500 EXP 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: MUL TEMP[5].z, TEMP[3].xxxx, IMM[0].wwww 24: MAD TEMP[6].w, TEMP[3].wwww, IMM[0].wwww, IMM[0].xxxx 25: FRC TEMP[7].z, TEMP[6].wwww 26: ADD TEMP[6].w, TEMP[6].wwww, -TEMP[7].zzzz 27: MAD TEMP[5].z, TEMP[6].wwww, IMM[1].xxxx, TEMP[5].zzzz 28: ADD TEMP[5].z, TEMP[5].zzzz, IMM[0].xxxx 29: FRC TEMP[6].w, TEMP[5].zzzz 30: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].zzzz 31: MOV TEMP[0].z, TEMP[5].zzzz 32: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 33: FRC TEMP[5].z, TEMP[3].wwww 34: MOV TEMP[1].z, TEMP[5].zzzz 35: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 36: MOV TEMP[0].w, TEMP[3].wwww 37: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 38: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 39: UIF TEMP[4].xxxx :0 40: MOV TEMP[4].x, TEMP[3].wwww 41: ELSE :0 42: MOV TEMP[4].x, TEMP[3].zzzz 43: ENDIF 44: MOV TEMP[0].y, TEMP[4].xxxx 45: RCP TEMP[3].x, CONST[0].xxxx 46: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 47: FRC TEMP[5].z, TEMP[3].xxxx 48: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 49: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 50: MOV TEMP[6].x, -TEMP[3].xxxx 51: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 52: UIF TEMP[5].xxxx :0 53: MOV TEMP[5].x, TEMP[3].xxxx 54: ELSE :0 55: MOV TEMP[5].x, TEMP[6].xxxx 56: ENDIF 57: MOV TEMP[0].z, TEMP[5].xxxx 58: RCP TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 60: FRC TEMP[4].w, TEMP[4].wwww 61: MOV TEMP[0].w, TEMP[4].wwww 62: RCP TEMP[0].x, TEMP[3].xxxx 63: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 64: MOV TEMP[2].xy, TEMP[3].xyxx 65: FRC TEMP[3].xy, TEMP[2] 66: MOV TEMP[0].xy, TEMP[3].xyxx 67: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: ADD TEMP[2].xy, TEMP[1], TEMP[0] 70: MOV TEMP[0].xy, TEMP[2].xyxx 71: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: FRC TEMP[2].zw, TEMP[2].xyxy 74: MOV TEMP[0].zw, TEMP[2].wwzw 75: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 76: MOV TEMP[0].xy, TEMP[2].xyxx 77: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 78: MOV TEMP[0].xy, TEMP[2].xyxx 79: RCP TEMP[1].x, CONST[3].xxxx 80: RCP TEMP[2].x, CONST[3].yyyy 81: MOV TEMP[1].y, TEMP[2].xxxx 82: MUL TEMP[1].xy, TEMP[0], TEMP[1] 83: MOV TEMP[1].xy, TEMP[1].xyyy 84: MOV TEMP[1].w, IMM[0].zzzz 85: TXL TEMP[1], TEMP[1], SAMP[1], 2D 86: MOV TEMP[0], TEMP[1] 87: ELSE :0 88: MOV TEMP[1].xy, IN[1].xyyy 89: TEX TEMP[1], TEMP[1], SAMP[1], 2D 90: MOV TEMP[0], TEMP[1] 91: ENDIF 92: MUL TEMP[0], TEMP[0], IN[0] 93: MOV OUT[0], TEMP[0] 94: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = fcmp olt float 5.000000e-01, %24 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 br i1 %48, label %IF, label %ELSE IF: ; preds = %main_body %49 = fmul float %26, %42 %50 = fmul float %27, %43 %51 = call float @llvm.AMDIL.fraction.(float %49) %52 = call float @llvm.AMDIL.fraction.(float %50) %53 = fsub float -0.000000e+00, %51 %54 = fadd float %49, %53 %55 = fsub float -0.000000e+00, %52 %56 = fadd float %50, %55 %57 = fsub float -0.000000e+00, %54 %58 = fmul float %42, %26 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %43, %27 %62 = fadd float %61, %60 %63 = fadd float %54, 5.000000e-01 %64 = fadd float %56, 5.000000e-01 %65 = fdiv float 1.000000e+00, %26 %66 = fdiv float 1.000000e+00, %27 %67 = fmul float %63, %65 %68 = fmul float %64, %66 %69 = bitcast float %67 to i32 %70 = bitcast float %68 to i32 %71 = bitcast float 0.000000e+00 to i32 %72 = insertelement <4 x i32> undef, i32 %69, i32 0 %73 = insertelement <4 x i32> %72, i32 %70, i32 1 %74 = insertelement <4 x i32> %73, i32 %71, i32 2 %75 = insertelement <4 x i32> %74, i32 undef, i32 3 %76 = bitcast <8 x i32> %31 to <32 x i8> %77 = bitcast <4 x i32> %33 to <16 x i8> %78 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 3 %81 = fsub float -0.000000e+00, %25 %82 = fadd float 1.024000e+03, %81 %83 = fmul float %79, 2.550000e+02 %84 = fmul float %80, 2.550000e+02 %85 = fadd float %84, 5.000000e-01 %86 = call float @llvm.AMDIL.fraction.(float %85) %87 = fsub float -0.000000e+00, %86 %88 = fadd float %85, %87 %89 = fmul float %88, 2.560000e+02 %90 = fadd float %89, %83 %91 = fadd float %90, 5.000000e-01 %92 = call float @llvm.AMDIL.fraction.(float %91) %93 = fsub float -0.000000e+00, %92 %94 = fadd float %93, %91 %95 = fmul float %25, %79 %96 = fadd float %95, 5.000000e-01 %97 = call float @llvm.AMDIL.fraction.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %96, %98 %100 = fadd float %94, 5.000000e-01 %101 = fadd float %99, 5.000000e-01 %102 = fcmp oge float %82, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float %101, float %100 %107 = fdiv float 1.000000e+00, %24 %108 = fmul float %28, %107 %109 = fadd float %108, 5.000000e-01 %110 = call float @llvm.AMDIL.fraction.(float %109) %111 = fsub float -0.000000e+00, %110 %112 = fadd float %111, %109 %113 = fmul float %112, %. %114 = fsub float -0.000000e+00, %112 %115 = fcmp oge float %113, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %116 to float %118 = bitcast float %117 to i32 %119 = icmp ne i32 %118, 0 %temp20.0 = select i1 %119, float %112, float %114 %120 = fdiv float 1.000000e+00, %temp20.0 %121 = fmul float %120, %. %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = fdiv float 1.000000e+00, %112 %124 = fmul float %122, %temp20.0 %125 = fmul float %123, %. %126 = call float @llvm.AMDIL.fraction.(float %124) %127 = call float @llvm.AMDIL.fraction.(float %125) %128 = fsub float -0.000000e+00, %126 %129 = fadd float %128, %124 %130 = fsub float -0.000000e+00, %127 %131 = fadd float %130, %125 %132 = fadd float %59, %129 %133 = fadd float %62, %131 %134 = fmul float %132, %24 %135 = fmul float %133, %24 %136 = call float @llvm.AMDIL.fraction.(float %134) %137 = call float @llvm.AMDIL.fraction.(float %135) %138 = fsub float -0.000000e+00, %136 %139 = fadd float %138, %134 %140 = fsub float -0.000000e+00, %137 %141 = fadd float %140, %135 %142 = fadd float %139, 5.000000e-01 %143 = fadd float %141, 5.000000e-01 %144 = fdiv float 1.000000e+00, %28 %145 = fdiv float 1.000000e+00, %29 %146 = fmul float %142, %144 %147 = fmul float %143, %145 %148 = bitcast float %146 to i32 %149 = bitcast float %147 to i32 %150 = bitcast float 0.000000e+00 to i32 %151 = insertelement <4 x i32> undef, i32 %148, i32 0 %152 = insertelement <4 x i32> %151, i32 %149, i32 1 %153 = insertelement <4 x i32> %152, i32 %150, i32 2 %154 = insertelement <4 x i32> %153, i32 undef, i32 3 %155 = bitcast <8 x i32> %35 to <32 x i8> %156 = bitcast <4 x i32> %37 to <16 x i8> %157 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %154, <32 x i8> %155, <16 x i8> %156, i32 2) %158 = extractelement <4 x float> %157, i32 0 %159 = extractelement <4 x float> %157, i32 1 %160 = extractelement <4 x float> %157, i32 2 br label %ENDIF ELSE: ; preds = %main_body %161 = bitcast float %42 to i32 %162 = bitcast float %43 to i32 %163 = insertelement <2 x i32> undef, i32 %161, i32 0 %164 = insertelement <2 x i32> %163, i32 %162, i32 1 %165 = bitcast <8 x i32> %35 to <32 x i8> %166 = bitcast <4 x i32> %37 to <16 x i8> %167 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %164, <32 x i8> %165, <16 x i8> %166, i32 2) %168 = extractelement <4 x float> %167, i32 0 %169 = extractelement <4 x float> %167, i32 1 %170 = extractelement <4 x float> %167, i32 2 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %167, %ELSE ], [ %157, %IF ] %temp2.0 = phi float [ %160, %IF ], [ %170, %ELSE ] %temp1.0 = phi float [ %159, %IF ], [ %169, %ELSE ] %temp.0 = phi float [ %158, %IF ], [ %168, %ELSE ] %171 = extractelement <4 x float> %.sink, i32 3 %172 = fmul float %temp.0, %38 %173 = fmul float %temp1.0, %39 %174 = fmul float %temp2.0, %40 %175 = fmul float %171, %41 %176 = call i32 @llvm.SI.packf16(float %172, float %173) %177 = bitcast i32 %176 to float %178 = call i32 @llvm.SI.packf16(float %174, float %175) %179 = bitcast i32 %178 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %177, float %179, float %177, float %179) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v7, v0, 1, 1, [m0] ; C81C0500 V_INTERP_P2_F32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 V_INTERP_P1_F32 v6, v0, 0, 1, [m0] ; C8180400 V_INTERP_P2_F32 v6, [v6], v1, 0, 1, [m0] ; C8190401 V_INTERP_P1_F32 v2, v0, 3, 0, [m0] ; C8080300 V_INTERP_P2_F32 v2, [v2], v1, 3, 0, [m0] ; C8090301 V_INTERP_P1_F32 v3, v0, 2, 0, [m0] ; C80C0200 V_INTERP_P2_F32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 V_INTERP_P1_F32 v4, v0, 1, 0, [m0] ; C8100100 V_INTERP_P2_F32 v4, [v4], v1, 1, 0, [m0] ; C8110101 V_INTERP_P1_F32 v5, v0, 0, 0, [m0] ; C8140000 V_INTERP_P2_F32 v5, [v5], v1, 0, 0, [m0] ; C8150001 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x4 ; C0840504 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x8 ; C0C60708 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s20, s[0:3], 0x0 ; C20A0100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_CMP_GT_F32_e64 s[22:23], s20, 5.000000e-01, 0, 0 ; D0080016 0001E014 V_CNDMASK_B32_e64 v0, 0, -1, s[22:23], 0, 0, 0, 0 ; D2000000 00598280 V_CMP_EQ_I32_e64 s[22:23], v0, 0, 0, 0 ; D1040016 00010100 S_AND_SAVEEXEC_B64 s[22:23], s[22:23] ; BE962416 S_XOR_B64 s[22:23], exec, s[22:23] ; 8996167E IMAGE_SAMPLE v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800F00 00430806 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_SAVEEXEC_B64 s[22:23], s[22:23] ; BE962516 S_XOR_B64 exec, exec, s[22:23] ; 89FE167E S_CBRANCH_EXECZ BB0_4 ; BF880000 S_BUFFER_LOAD_DWORD s21, s[0:3], 0xd ; C20A810D S_BUFFER_LOAD_DWORD s24, s[0:3], 0xc ; C20C010C S_BUFFER_LOAD_DWORD s25, s[0:3], 0x9 ; C20C8109 S_BUFFER_LOAD_DWORD s26, s[0:3], 0x8 ; C20D0108 S_BUFFER_LOAD_DWORD s27, s[0:3], 0x4 ; C20D8104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s21 ; 7E000215 V_MOV_B32_e32 v1, s24 ; 7E020218 V_MOV_B32_e32 v8, s25 ; 7E100219 V_MOV_B32_e32 v9, s26 ; 7E12021A V_MOV_B32_e32 v10, s27 ; 7E14021B V_MOV_B32_e32 v11, s20 ; 7E160214 V_MUL_F32_e32 v12, v7, v8 ; 10181107 V_FRACT_F32_e32 v13, v12 ; 7E1A410C V_SUB_F32_e32 v13, v12, v13 ; 081A1B0C V_ADD_F32_e32 v14, 5.000000e-01, v13 ; 061C1AF0 V_RCP_F32_e32 v8, v8 ; 7E105508 V_MUL_F32_e32 v15, v14, v8 ; 101E110E V_MUL_F32_e32 v6, v6, v9 ; 100C1306 V_FRACT_F32_e32 v7, v6 ; 7E0E4106 V_SUB_F32_e32 v7, v6, v7 ; 080E0F06 V_ADD_F32_e32 v8, 5.000000e-01, v7 ; 06100EF0 V_RCP_F32_e32 v9, v9 ; 7E125509 V_MUL_F32_e32 v14, v8, v9 ; 101C1308 V_MOV_B32_e32 v16, 0 ; 7E200280 S_LOAD_DWORDX4 s[24:27], s[4:5], 0x0 ; C08C0500 S_LOAD_DWORDX8 s[28:35], s[6:7], 0x0 ; C0CE0700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE_L v[8:9], 9, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[28:35], s[24:27] ; F0900900 00C7080E V_MOV_B32_e32 v18, 2.550000e+02 ; 7E2402FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v19, v9, v18, 5.000000e-01, 0, 0 ; D2820013 03C22509 V_FRACT_F32_e32 v20, v19 ; 7E284113 V_SUB_F32_e32 v19, v19, v20 ; 08262913 V_MUL_F32_e32 v19, 2.560000e+02, v19 ; 102626FF 43800000 V_MAD_F32 v18, v8, v18, v19, 0, 0 ; D2820012 044E2508 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_FRACT_F32_e32 v19, v18 ; 7E264112 V_SUB_F32_e32 v18, v18, v19 ; 08242712 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_MAD_F32 v8, v10, v8, 5.000000e-01, 0, 0 ; D2820008 03C2110A V_FRACT_F32_e32 v9, v8 ; 7E124108 V_SUB_F32_e32 v8, v8, v9 ; 08101308 V_ADD_F32_e32 v8, 5.000000e-01, v8 ; 061010F0 V_SUB_F32_e32 v9, 1.024000e+03, v10 ; 081214FF 44800000 V_CMP_GE_F32_e64 s[24:25], v9, 0.000000e+00, 0, 0 ; D00C0018 00010109 V_CNDMASK_B32_e64 v8, v18, v8, s[24:25], 0, 0, 0, 0 ; D2000008 00621112 V_RCP_F32_e32 v9, v11 ; 7E12550B V_MAD_F32 v9, v1, v9, 5.000000e-01, 0, 0 ; D2820009 03C21301 V_FRACT_F32_e32 v10, v9 ; 7E144109 V_SUB_F32_e32 v9, v9, v10 ; 08121509 V_MUL_F32_e32 v10, v9, v8 ; 10141109 V_CMP_GE_F32_e64 s[24:25], v10, 0.000000e+00, 0, 0 ; D00C0018 0001010A V_MOV_B32_e32 v10, 0x80000000 ; 7E1402FF 80000000 V_XOR_B32_e32 v10, v9, v10 ; 3A141509 V_CNDMASK_B32_e64 v10, v10, v9, s[24:25], 0, 0, 0, 0 ; D200000A 0062130A V_RCP_F32_e32 v18, v10 ; 7E24550A V_MUL_F32_e32 v18, v18, v8 ; 10241112 V_FRACT_F32_e32 v18, v18 ; 7E244112 V_MUL_F32_e32 v10, v18, v10 ; 10141512 V_FRACT_F32_e32 v18, v10 ; 7E24410A V_SUB_F32_e32 v10, v10, v18 ; 0814250A V_SUB_F32_e32 v6, v6, v7 ; 080C0F06 V_ADD_F32_e32 v6, v6, v10 ; 060C1506 V_MUL_F32_e32 v6, v6, v11 ; 100C1706 V_FRACT_F32_e32 v7, v6 ; 7E0E4106 V_SUB_F32_e32 v6, v6, v7 ; 080C0F06 V_ADD_F32_e32 v6, 5.000000e-01, v6 ; 060C0CF0 V_RCP_F32_e32 v1, v1 ; 7E025501 V_MUL_F32_e32 v14, v6, v1 ; 101C0306 V_RCP_F32_e32 v1, v9 ; 7E025509 V_MUL_F32_e32 v1, v1, v8 ; 10021101 V_FRACT_F32_e32 v6, v1 ; 7E0C4101 V_SUB_F32_e32 v1, v1, v6 ; 08020D01 V_SUB_F32_e32 v6, v12, v13 ; 080C1B0C V_ADD_F32_e32 v1, v6, v1 ; 06020306 V_MUL_F32_e32 v1, v1, v11 ; 10021701 V_FRACT_F32_e32 v6, v1 ; 7E0C4101 V_SUB_F32_e32 v1, v1, v6 ; 08020D01 V_ADD_F32_e32 v1, 5.000000e-01, v1 ; 060202F0 V_RCP_F32_e32 v0, v0 ; 7E005500 V_MUL_F32_e32 v15, v1, v0 ; 101E0101 IMAGE_SAMPLE_L v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[12:19], s[8:11] ; F0900F00 0043080E S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_B64 exec, exec, s[22:23] ; 88FE167E V_MUL_F32_e32 v0, v9, v4 ; 10000909 V_MUL_F32_e32 v1, v8, v5 ; 10020B08 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_MUL_F32_e32 v1, v10, v3 ; 1002070A V_MUL_F32_e32 v2, v11, v2 ; 1004050B V_CVT_PKRTZ_F16_F32_e32 v1, v1, v2 ; 5E020501 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v4 ; 10020804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 EXP 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v2, v6, v5, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v1, v1, v7, v6, 0, 0 ; D2820001 041A0F01 V_MOV_B32_e32 v2, 1.000000e+00 ; 7E0402F2 V_MOV_B32_e32 v3, 0.000000e+00 ; 7E060280 EXP 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v4, s4 ; 7E080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v0, v1, v6, v5, 0, 0 ; D2820000 04160D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v0 ; 10020004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x23 ; C2020123 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x11 ; C2028111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MUL_F32_e32 v1, v1, v2 ; 10020501 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x22 ; C2020122 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v4, s4, v5, 0, 0 ; D2820005 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v2 ; 100A0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x20 ; C2000120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v2 ; 10000500 EXP 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: MUL TEMP[5].z, TEMP[3].xxxx, IMM[0].wwww 24: MAD TEMP[6].w, TEMP[3].wwww, IMM[0].wwww, IMM[0].xxxx 25: FRC TEMP[7].z, TEMP[6].wwww 26: ADD TEMP[6].w, TEMP[6].wwww, -TEMP[7].zzzz 27: MAD TEMP[5].z, TEMP[6].wwww, IMM[1].xxxx, TEMP[5].zzzz 28: ADD TEMP[5].z, TEMP[5].zzzz, IMM[0].xxxx 29: FRC TEMP[6].w, TEMP[5].zzzz 30: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].zzzz 31: MOV TEMP[0].z, TEMP[5].zzzz 32: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 33: FRC TEMP[5].z, TEMP[3].wwww 34: MOV TEMP[1].z, TEMP[5].zzzz 35: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 36: MOV TEMP[0].w, TEMP[3].wwww 37: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 38: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 39: UIF TEMP[4].xxxx :0 40: MOV TEMP[4].x, TEMP[3].wwww 41: ELSE :0 42: MOV TEMP[4].x, TEMP[3].zzzz 43: ENDIF 44: MOV TEMP[0].y, TEMP[4].xxxx 45: RCP TEMP[3].x, CONST[0].xxxx 46: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 47: FRC TEMP[5].z, TEMP[3].xxxx 48: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 49: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 50: MOV TEMP[6].x, -TEMP[3].xxxx 51: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 52: UIF TEMP[5].xxxx :0 53: MOV TEMP[5].x, TEMP[3].xxxx 54: ELSE :0 55: MOV TEMP[5].x, TEMP[6].xxxx 56: ENDIF 57: MOV TEMP[0].z, TEMP[5].xxxx 58: RCP TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 60: FRC TEMP[4].w, TEMP[4].wwww 61: MOV TEMP[0].w, TEMP[4].wwww 62: RCP TEMP[0].x, TEMP[3].xxxx 63: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 64: MOV TEMP[2].xy, TEMP[3].xyxx 65: FRC TEMP[3].xy, TEMP[2] 66: MOV TEMP[0].xy, TEMP[3].xyxx 67: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: ADD TEMP[2].xy, TEMP[1], TEMP[0] 70: MOV TEMP[0].xy, TEMP[2].xyxx 71: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: FRC TEMP[2].zw, TEMP[2].xyxy 74: MOV TEMP[0].zw, TEMP[2].wwzw 75: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 76: MOV TEMP[0].xy, TEMP[2].xyxx 77: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 78: MOV TEMP[0].xy, TEMP[2].xyxx 79: RCP TEMP[1].x, CONST[3].xxxx 80: RCP TEMP[2].x, CONST[3].yyyy 81: MOV TEMP[1].y, TEMP[2].xxxx 82: MUL TEMP[1].xy, TEMP[0], TEMP[1] 83: MOV TEMP[1].xy, TEMP[1].xyyy 84: MOV TEMP[1].w, IMM[0].zzzz 85: TXL TEMP[1], TEMP[1], SAMP[1], 2D 86: MOV TEMP[0], TEMP[1] 87: ELSE :0 88: MOV TEMP[1].xy, IN[1].xyyy 89: TEX TEMP[1], TEMP[1], SAMP[1], 2D 90: MOV TEMP[0], TEMP[1] 91: ENDIF 92: MUL TEMP[0], TEMP[0], IN[0] 93: MOV OUT[0], TEMP[0] 94: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = fcmp olt float 5.000000e-01, %24 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 br i1 %48, label %IF, label %ELSE IF: ; preds = %main_body %49 = fmul float %26, %42 %50 = fmul float %27, %43 %51 = call float @llvm.AMDIL.fraction.(float %49) %52 = call float @llvm.AMDIL.fraction.(float %50) %53 = fsub float -0.000000e+00, %51 %54 = fadd float %49, %53 %55 = fsub float -0.000000e+00, %52 %56 = fadd float %50, %55 %57 = fsub float -0.000000e+00, %54 %58 = fmul float %42, %26 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %43, %27 %62 = fadd float %61, %60 %63 = fadd float %54, 5.000000e-01 %64 = fadd float %56, 5.000000e-01 %65 = fdiv float 1.000000e+00, %26 %66 = fdiv float 1.000000e+00, %27 %67 = fmul float %63, %65 %68 = fmul float %64, %66 %69 = bitcast float %67 to i32 %70 = bitcast float %68 to i32 %71 = bitcast float 0.000000e+00 to i32 %72 = insertelement <4 x i32> undef, i32 %69, i32 0 %73 = insertelement <4 x i32> %72, i32 %70, i32 1 %74 = insertelement <4 x i32> %73, i32 %71, i32 2 %75 = insertelement <4 x i32> %74, i32 undef, i32 3 %76 = bitcast <8 x i32> %31 to <32 x i8> %77 = bitcast <4 x i32> %33 to <16 x i8> %78 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 3 %81 = fsub float -0.000000e+00, %25 %82 = fadd float 1.024000e+03, %81 %83 = fmul float %79, 2.550000e+02 %84 = fmul float %80, 2.550000e+02 %85 = fadd float %84, 5.000000e-01 %86 = call float @llvm.AMDIL.fraction.(float %85) %87 = fsub float -0.000000e+00, %86 %88 = fadd float %85, %87 %89 = fmul float %88, 2.560000e+02 %90 = fadd float %89, %83 %91 = fadd float %90, 5.000000e-01 %92 = call float @llvm.AMDIL.fraction.(float %91) %93 = fsub float -0.000000e+00, %92 %94 = fadd float %93, %91 %95 = fmul float %25, %79 %96 = fadd float %95, 5.000000e-01 %97 = call float @llvm.AMDIL.fraction.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %96, %98 %100 = fadd float %94, 5.000000e-01 %101 = fadd float %99, 5.000000e-01 %102 = fcmp oge float %82, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float %101, float %100 %107 = fdiv float 1.000000e+00, %24 %108 = fmul float %28, %107 %109 = fadd float %108, 5.000000e-01 %110 = call float @llvm.AMDIL.fraction.(float %109) %111 = fsub float -0.000000e+00, %110 %112 = fadd float %111, %109 %113 = fmul float %112, %. %114 = fsub float -0.000000e+00, %112 %115 = fcmp oge float %113, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %116 to float %118 = bitcast float %117 to i32 %119 = icmp ne i32 %118, 0 %temp20.0 = select i1 %119, float %112, float %114 %120 = fdiv float 1.000000e+00, %temp20.0 %121 = fmul float %120, %. %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = fdiv float 1.000000e+00, %112 %124 = fmul float %122, %temp20.0 %125 = fmul float %123, %. %126 = call float @llvm.AMDIL.fraction.(float %124) %127 = call float @llvm.AMDIL.fraction.(float %125) %128 = fsub float -0.000000e+00, %126 %129 = fadd float %128, %124 %130 = fsub float -0.000000e+00, %127 %131 = fadd float %130, %125 %132 = fadd float %59, %129 %133 = fadd float %62, %131 %134 = fmul float %132, %24 %135 = fmul float %133, %24 %136 = call float @llvm.AMDIL.fraction.(float %134) %137 = call float @llvm.AMDIL.fraction.(float %135) %138 = fsub float -0.000000e+00, %136 %139 = fadd float %138, %134 %140 = fsub float -0.000000e+00, %137 %141 = fadd float %140, %135 %142 = fadd float %139, 5.000000e-01 %143 = fadd float %141, 5.000000e-01 %144 = fdiv float 1.000000e+00, %28 %145 = fdiv float 1.000000e+00, %29 %146 = fmul float %142, %144 %147 = fmul float %143, %145 %148 = bitcast float %146 to i32 %149 = bitcast float %147 to i32 %150 = bitcast float 0.000000e+00 to i32 %151 = insertelement <4 x i32> undef, i32 %148, i32 0 %152 = insertelement <4 x i32> %151, i32 %149, i32 1 %153 = insertelement <4 x i32> %152, i32 %150, i32 2 %154 = insertelement <4 x i32> %153, i32 undef, i32 3 %155 = bitcast <8 x i32> %35 to <32 x i8> %156 = bitcast <4 x i32> %37 to <16 x i8> %157 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %154, <32 x i8> %155, <16 x i8> %156, i32 2) %158 = extractelement <4 x float> %157, i32 0 %159 = extractelement <4 x float> %157, i32 1 %160 = extractelement <4 x float> %157, i32 2 br label %ENDIF ELSE: ; preds = %main_body %161 = bitcast float %42 to i32 %162 = bitcast float %43 to i32 %163 = insertelement <2 x i32> undef, i32 %161, i32 0 %164 = insertelement <2 x i32> %163, i32 %162, i32 1 %165 = bitcast <8 x i32> %35 to <32 x i8> %166 = bitcast <4 x i32> %37 to <16 x i8> %167 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %164, <32 x i8> %165, <16 x i8> %166, i32 2) %168 = extractelement <4 x float> %167, i32 0 %169 = extractelement <4 x float> %167, i32 1 %170 = extractelement <4 x float> %167, i32 2 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %167, %ELSE ], [ %157, %IF ] %temp2.0 = phi float [ %160, %IF ], [ %170, %ELSE ] %temp1.0 = phi float [ %159, %IF ], [ %169, %ELSE ] %temp.0 = phi float [ %158, %IF ], [ %168, %ELSE ] %171 = extractelement <4 x float> %.sink, i32 3 %172 = fmul float %temp.0, %38 %173 = fmul float %temp1.0, %39 %174 = fmul float %temp2.0, %40 %175 = fmul float %171, %41 %176 = call i32 @llvm.SI.packf16(float %172, float %173) %177 = bitcast i32 %176 to float %178 = call i32 @llvm.SI.packf16(float %174, float %175) %179 = bitcast i32 %178 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %177, float %179, float %177, float %179) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v7, v0, 1, 1, [m0] ; C81C0500 V_INTERP_P2_F32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 V_INTERP_P1_F32 v6, v0, 0, 1, [m0] ; C8180400 V_INTERP_P2_F32 v6, [v6], v1, 0, 1, [m0] ; C8190401 V_INTERP_P1_F32 v2, v0, 3, 0, [m0] ; C8080300 V_INTERP_P2_F32 v2, [v2], v1, 3, 0, [m0] ; C8090301 V_INTERP_P1_F32 v3, v0, 2, 0, [m0] ; C80C0200 V_INTERP_P2_F32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 V_INTERP_P1_F32 v4, v0, 1, 0, [m0] ; C8100100 V_INTERP_P2_F32 v4, [v4], v1, 1, 0, [m0] ; C8110101 V_INTERP_P1_F32 v5, v0, 0, 0, [m0] ; C8140000 V_INTERP_P2_F32 v5, [v5], v1, 0, 0, [m0] ; C8150001 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x4 ; C0840504 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x8 ; C0C60708 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s20, s[0:3], 0x0 ; C20A0100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_CMP_GT_F32_e64 s[22:23], s20, 5.000000e-01, 0, 0 ; D0080016 0001E014 V_CNDMASK_B32_e64 v0, 0, -1, s[22:23], 0, 0, 0, 0 ; D2000000 00598280 V_CMP_EQ_I32_e64 s[22:23], v0, 0, 0, 0 ; D1040016 00010100 S_AND_SAVEEXEC_B64 s[22:23], s[22:23] ; BE962416 S_XOR_B64 s[22:23], exec, s[22:23] ; 8996167E IMAGE_SAMPLE v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800F00 00430806 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_SAVEEXEC_B64 s[22:23], s[22:23] ; BE962516 S_XOR_B64 exec, exec, s[22:23] ; 89FE167E S_CBRANCH_EXECZ BB0_4 ; BF880000 S_BUFFER_LOAD_DWORD s21, s[0:3], 0xd ; C20A810D S_BUFFER_LOAD_DWORD s24, s[0:3], 0xc ; C20C010C S_BUFFER_LOAD_DWORD s25, s[0:3], 0x9 ; C20C8109 S_BUFFER_LOAD_DWORD s26, s[0:3], 0x8 ; C20D0108 S_BUFFER_LOAD_DWORD s27, s[0:3], 0x4 ; C20D8104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s21 ; 7E000215 V_MOV_B32_e32 v1, s24 ; 7E020218 V_MOV_B32_e32 v8, s25 ; 7E100219 V_MOV_B32_e32 v9, s26 ; 7E12021A V_MOV_B32_e32 v10, s27 ; 7E14021B V_MOV_B32_e32 v11, s20 ; 7E160214 V_MUL_F32_e32 v12, v7, v8 ; 10181107 V_FRACT_F32_e32 v13, v12 ; 7E1A410C V_SUB_F32_e32 v13, v12, v13 ; 081A1B0C V_ADD_F32_e32 v14, 5.000000e-01, v13 ; 061C1AF0 V_RCP_F32_e32 v8, v8 ; 7E105508 V_MUL_F32_e32 v15, v14, v8 ; 101E110E V_MUL_F32_e32 v6, v6, v9 ; 100C1306 V_FRACT_F32_e32 v7, v6 ; 7E0E4106 V_SUB_F32_e32 v7, v6, v7 ; 080E0F06 V_ADD_F32_e32 v8, 5.000000e-01, v7 ; 06100EF0 V_RCP_F32_e32 v9, v9 ; 7E125509 V_MUL_F32_e32 v14, v8, v9 ; 101C1308 V_MOV_B32_e32 v16, 0 ; 7E200280 S_LOAD_DWORDX4 s[24:27], s[4:5], 0x0 ; C08C0500 S_LOAD_DWORDX8 s[28:35], s[6:7], 0x0 ; C0CE0700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE_L v[8:9], 9, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[28:35], s[24:27] ; F0900900 00C7080E V_MOV_B32_e32 v18, 2.550000e+02 ; 7E2402FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v19, v9, v18, 5.000000e-01, 0, 0 ; D2820013 03C22509 V_FRACT_F32_e32 v20, v19 ; 7E284113 V_SUB_F32_e32 v19, v19, v20 ; 08262913 V_MUL_F32_e32 v19, 2.560000e+02, v19 ; 102626FF 43800000 V_MAD_F32 v18, v8, v18, v19, 0, 0 ; D2820012 044E2508 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_FRACT_F32_e32 v19, v18 ; 7E264112 V_SUB_F32_e32 v18, v18, v19 ; 08242712 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_MAD_F32 v8, v10, v8, 5.000000e-01, 0, 0 ; D2820008 03C2110A V_FRACT_F32_e32 v9, v8 ; 7E124108 V_SUB_F32_e32 v8, v8, v9 ; 08101308 V_ADD_F32_e32 v8, 5.000000e-01, v8 ; 061010F0 V_SUB_F32_e32 v9, 1.024000e+03, v10 ; 081214FF 44800000 V_CMP_GE_F32_e64 s[24:25], v9, 0.000000e+00, 0, 0 ; D00C0018 00010109 V_CNDMASK_B32_e64 v8, v18, v8, s[24:25], 0, 0, 0, 0 ; D2000008 00621112 V_RCP_F32_e32 v9, v11 ; 7E12550B V_MAD_F32 v9, v1, v9, 5.000000e-01, 0, 0 ; D2820009 03C21301 V_FRACT_F32_e32 v10, v9 ; 7E144109 V_SUB_F32_e32 v9, v9, v10 ; 08121509 V_MUL_F32_e32 v10, v9, v8 ; 10141109 V_CMP_GE_F32_e64 s[24:25], v10, 0.000000e+00, 0, 0 ; D00C0018 0001010A V_MOV_B32_e32 v10, 0x80000000 ; 7E1402FF 80000000 V_XOR_B32_e32 v10, v9, v10 ; 3A141509 V_CNDMASK_B32_e64 v10, v10, v9, s[24:25], 0, 0, 0, 0 ; D200000A 0062130A V_RCP_F32_e32 v18, v10 ; 7E24550A V_MUL_F32_e32 v18, v18, v8 ; 10241112 V_FRACT_F32_e32 v18, v18 ; 7E244112 V_MUL_F32_e32 v10, v18, v10 ; 10141512 V_FRACT_F32_e32 v18, v10 ; 7E24410A V_SUB_F32_e32 v10, v10, v18 ; 0814250A V_SUB_F32_e32 v6, v6, v7 ; 080C0F06 V_ADD_F32_e32 v6, v6, v10 ; 060C1506 V_MUL_F32_e32 v6, v6, v11 ; 100C1706 V_FRACT_F32_e32 v7, v6 ; 7E0E4106 V_SUB_F32_e32 v6, v6, v7 ; 080C0F06 V_ADD_F32_e32 v6, 5.000000e-01, v6 ; 060C0CF0 V_RCP_F32_e32 v1, v1 ; 7E025501 V_MUL_F32_e32 v14, v6, v1 ; 101C0306 V_RCP_F32_e32 v1, v9 ; 7E025509 V_MUL_F32_e32 v1, v1, v8 ; 10021101 V_FRACT_F32_e32 v6, v1 ; 7E0C4101 V_SUB_F32_e32 v1, v1, v6 ; 08020D01 V_SUB_F32_e32 v6, v12, v13 ; 080C1B0C V_ADD_F32_e32 v1, v6, v1 ; 06020306 V_MUL_F32_e32 v1, v1, v11 ; 10021701 V_FRACT_F32_e32 v6, v1 ; 7E0C4101 V_SUB_F32_e32 v1, v1, v6 ; 08020D01 V_ADD_F32_e32 v1, 5.000000e-01, v1 ; 060202F0 V_RCP_F32_e32 v0, v0 ; 7E005500 V_MUL_F32_e32 v15, v1, v0 ; 101E0101 IMAGE_SAMPLE_L v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[12:19], s[8:11] ; F0900F00 0043080E S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_B64 exec, exec, s[22:23] ; 88FE167E V_MUL_F32_e32 v0, v9, v4 ; 10000909 V_MUL_F32_e32 v1, v8, v5 ; 10020B08 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_MUL_F32_e32 v1, v10, v3 ; 1002070A V_MUL_F32_e32 v2, v11, v2 ; 1004050B V_CVT_PKRTZ_F16_F32_e32 v1, v1, v2 ; 5E020501 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table /media/ssd_plain/sliedes/_steam/steam/SteamApps/common/Sid Meier's Civilization V/steamassets/controller.vdf VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..2], LOCAL 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[5] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[4], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[7] 4: ADD TEMP[1].x, CONST[3].xxxx, CONST[3].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MUL TEMP[0], CONST[1], IN[1] 7: MUL TEMP[2].w, TEMP[0].wwww, CONST[2].wwww 8: MOV TEMP[2].w, TEMP[2].wwww 9: MOV TEMP[2].xyz, TEMP[0].xyzx 10: MOV OUT[1], TEMP[2] 11: MOV OUT[0], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fmul float %40, %13 %51 = fadd float %50, %15 %52 = fmul float %41, %14 %53 = fadd float %52, %16 %54 = fmul float %53, %28 %55 = fmul float %53, %29 %56 = fmul float %53, %30 %57 = fmul float %53, %31 %58 = fmul float %51, %24 %59 = fadd float %58, %54 %60 = fmul float %51, %25 %61 = fadd float %60, %55 %62 = fmul float %51, %26 %63 = fadd float %62, %56 %64 = fmul float %51, %27 %65 = fadd float %64, %57 %66 = fadd float %59, %32 %67 = fadd float %61, %33 %68 = fadd float %63, %34 %69 = fadd float %65, %35 %70 = fadd float %22, %23 %71 = fmul float %66, %70 %72 = fmul float %67, %70 %73 = fmul float %68, %70 %74 = fmul float %69, %70 %75 = fmul float %17, %46 %76 = fmul float %18, %47 %77 = fmul float %19, %48 %78 = fmul float %20, %49 %79 = fmul float %78, %21 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %75, float %76, float %77, float %79) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %71, float %72, float %73, float %74) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v4 ; 10020804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 EXP 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v4, s4 ; 7E080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v0, v1, v6, v5, 0, 0 ; D2820000 04160D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v0 ; 10020004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1f ; C202011F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xc ; C202010C S_BUFFER_LOAD_DWORD s5, s[0:3], 0xd ; C202810D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MUL_F32_e32 v1, v1, v2 ; 10020501 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1e ; C202011E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x11 ; C2020111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v4, s4, v5, 0, 0 ; D2820005 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1d ; C202011D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v2 ; 100A0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x1c ; C200011C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v2 ; 10000500 EXP 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 3, 0, [m0] ; C8080300 V_INTERP_P2_F32 v2, [v2], v1, 3, 0, [m0] ; C8090301 V_INTERP_P1_F32 v3, v0, 2, 0, [m0] ; C80C0200 V_INTERP_P2_F32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 V_CVT_PKRTZ_F16_F32_e32 v2, v3, v2 ; 5E040503 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v4, v0, 0, 0, [m0] ; C8100000 V_INTERP_P2_F32 v4, [v4], v1, 0, 0, [m0] ; C8110001 V_CVT_PKRTZ_F16_F32_e32 v0, v4, v3 ; 5E000704 EXP 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..24] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: F2I TEMP[0].x, IN[3].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: UARL ADDR[0].x, TEMP[0].xxxx 3: MOV TEMP[1], CONST[ADDR[0].x+4].zwzw 4: UARL ADDR[0].x, TEMP[0].xxxx 5: MAD TEMP[1].xy, IN[0], CONST[ADDR[0].x+4], TEMP[1] 6: UARL ADDR[0].x, TEMP[0].xxxx 7: UARL ADDR[0].x, TEMP[0].xxxx 8: MOV TEMP[2], CONST[ADDR[0].x+13].zwzw 9: UARL ADDR[0].x, TEMP[0].xxxx 10: MAD TEMP[2].xy, IN[1], CONST[ADDR[0].x+13], TEMP[2] 11: MOV TEMP[2].xy, TEMP[2].xyxx 12: MUL TEMP[3], TEMP[1].yyyy, CONST[1] 13: MAD TEMP[0], TEMP[1].xxxx, CONST[0], TEMP[3] 14: ADD TEMP[0], TEMP[0], CONST[3] 15: ADD TEMP[1].x, CONST[24].xxxx, CONST[24].yyyy 16: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 17: MUL TEMP[0], CONST[22], IN[2] 18: MUL TEMP[3].w, TEMP[0].wwww, CONST[23].wwww 19: MOV TEMP[3].w, TEMP[3].wwww 20: MOV TEMP[3].xyz, TEMP[0].xyzx 21: MOV TEMP[0].xy, IN[3].yzyy 22: MOV TEMP[2].zw, IMM[0].yyxy 23: MOV TEMP[0].zw, IMM[0].yyxy 24: MOV OUT[1], TEMP[3] 25: MOV OUT[2], TEMP[2] 26: MOV OUT[0], TEMP[1] 27: MOV OUT[3], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %39 = load <16 x i8> addrspace(2)* %38, !tbaa !0 %40 = add i32 %5, %7 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = fptosi float %56 to i32 %60 = bitcast i32 %59 to float %61 = bitcast float %60 to i32 %62 = shl i32 %61, 4 %63 = add i32 %62, 72 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %61, 4 %66 = add i32 %65, 76 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = bitcast float %60 to i32 %69 = shl i32 %68, 4 %70 = add i32 %69, 64 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = fmul float %36, %71 %73 = fadd float %72, %64 %74 = shl i32 %68, 4 %75 = add i32 %74, 68 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = fmul float %37, %76 %78 = fadd float %77, %67 %79 = bitcast float %60 to i32 %80 = shl i32 %79, 4 %81 = add i32 %80, 216 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %79, 4 %84 = add i32 %83, 220 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = bitcast float %60 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 208 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = fmul float %42, %89 %91 = fadd float %90, %82 %92 = shl i32 %86, 4 %93 = add i32 %92, 212 %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %93) %95 = fmul float %43, %94 %96 = fadd float %95, %85 %97 = fmul float %78, %17 %98 = fmul float %78, %18 %99 = fmul float %78, %19 %100 = fmul float %78, %20 %101 = fmul float %73, %13 %102 = fadd float %101, %97 %103 = fmul float %73, %14 %104 = fadd float %103, %98 %105 = fmul float %73, %15 %106 = fadd float %105, %99 %107 = fmul float %73, %16 %108 = fadd float %107, %100 %109 = fadd float %102, %21 %110 = fadd float %104, %22 %111 = fadd float %106, %23 %112 = fadd float %108, %24 %113 = fadd float %30, %31 %114 = fmul float %109, %113 %115 = fmul float %110, %113 %116 = fmul float %111, %113 %117 = fmul float %112, %113 %118 = fmul float %25, %48 %119 = fmul float %26, %49 %120 = fmul float %27, %50 %121 = fmul float %28, %51 %122 = fmul float %121, %29 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %118, float %119, float %120, float %122) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %91, float %96, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %57, float %58, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %115, float %116, float %117) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5a ; C202015A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x59 ; C2020159 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x58 ; C2020158 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5b ; C202015B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v4 ; 10020804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5f ; C202015F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 EXP 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 S_LOAD_DWORDX4 s[4:7], s[8:9], 0xc ; C082090C S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_I32_F32_e32 v5, v1 ; 7E0A1101 V_LSHLREV_B32_e32 v5, 4, v5 ; 340A0A84 V_ADD_I32_e32 v6, 0xdc, v5 ; 4A0C0AFF 000000DC BUFFER_LOAD_DWORD v6, s[0:3] + v6 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000606 V_ADD_I32_e32 v7, 0xd4, v5 ; 4A0E0AFF 000000D4 BUFFER_LOAD_DWORD v7, s[0:3] + v7 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000707 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[8:11], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010800 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v6, v9, v7, v6, 0, 0 ; D2820006 041A0F09 V_ADD_I32_e32 v7, 0xd8, v5 ; 4A0E0AFF 000000D8 BUFFER_LOAD_DWORD v7, s[0:3] + v7 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000707 V_ADD_I32_e32 v12, 0xd0, v5 ; 4A180AFF 000000D0 BUFFER_LOAD_DWORD v12, s[0:3] + v12 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000C0C S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v7, v8, v12, v7, 0, 0 ; D2820007 041E1908 V_MOV_B32_e32 v8, 1.000000e+00 ; 7E1002F2 V_MOV_B32_e32 v9, 0.000000e+00 ; 7E120280 EXP 15, 33, 0, 0, 0, v7, v6, v9, v8 ; F800021F 08090607 EXP 15, 34, 0, 0, 0, v2, v3, v9, v8 ; F800022F 08090302 S_WAITCNT expcnt(0) ; BF8C070F V_ADD_I32_e32 v1, 0x48, v5 ; 4A020AFF 00000048 BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 V_ADD_I32_e32 v2, 64, v5 ; 4A040AC0 BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[6:9], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010600 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v6, v2, v1, 0, 0 ; D2820000 04060506 V_ADD_I32_e32 v1, 0x4c, v5 ; 4A020AFF 0000004C BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 V_ADD_I32_e32 v2, 0x44, v5 ; 4A040AFF 00000044 BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v7, v2, v1, 0, 0 ; D2820001 04060507 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v1 ; 10040204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v0, s4, v2, 0, 0 ; D2820002 04080900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v2, s4, v2 ; 06040404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x60 ; C2020160 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x61 ; C2028161 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s5 ; 7E060205 V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v2, v2, v3 ; 10040702 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v1 ; 10080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v0, s4, v4, 0, 0 ; D2820004 04100900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xe ; C202010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s4, v4 ; 06080804 V_MUL_F32_e32 v4, v4, v3 ; 10080704 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v1 ; 100A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v0, s4, v5, 0, 0 ; D2820005 04140900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v3 ; 100A0705 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s4, v1, 0, 0 ; D2820000 04040900 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v3 ; 10000700 EXP 15, 12, 0, 1, 0, v0, v5, v4, v2 ; F80008CF 02040500 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[0], TEMP[0], IN[2].yyyy 3: MOV TEMP[1].xy, IN[1].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MAD TEMP[0], IN[2].xxxx, TEMP[1], TEMP[0] 6: MUL TEMP[0], TEMP[0], IN[0] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %38 = bitcast float %34 to i32 %39 = bitcast float %35 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = bitcast <8 x i32> %27 to <32 x i8> %43 = bitcast <4 x i32> %29 to <16 x i8> %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %41, <32 x i8> %42, <16 x i8> %43, i32 2) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = fmul float %45, %37 %50 = fmul float %46, %37 %51 = fmul float %47, %37 %52 = fmul float %48, %37 %53 = bitcast float %34 to i32 %54 = bitcast float %35 to i32 %55 = insertelement <2 x i32> undef, i32 %53, i32 0 %56 = insertelement <2 x i32> %55, i32 %54, i32 1 %57 = bitcast <8 x i32> %23 to <32 x i8> %58 = bitcast <4 x i32> %25 to <16 x i8> %59 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %56, <32 x i8> %57, <16 x i8> %58, i32 2) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = extractelement <4 x float> %59, i32 3 %64 = fmul float %36, %60 %65 = fadd float %64, %49 %66 = fmul float %36, %61 %67 = fadd float %66, %50 %68 = fmul float %36, %62 %69 = fadd float %68, %51 %70 = fmul float %36, %63 %71 = fadd float %70, %52 %72 = fmul float %65, %30 %73 = fmul float %67, %31 %74 = fmul float %69, %32 %75 = fmul float %71, %33 %76 = call i32 @llvm.SI.packf16(float %72, float %73) %77 = bitcast i32 %76 to float %78 = call i32 @llvm.SI.packf16(float %74, float %75) %79 = bitcast i32 %78 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %77, float %79, float %77, float %79) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 1, [m0] ; C80C0500 V_INTERP_P2_F32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 V_INTERP_P1_F32 v2, v0, 0, 1, [m0] ; C8080400 V_INTERP_P2_F32 v2, [v2], v1, 0, 1, [m0] ; C8090401 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x8 ; C0C60708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030402 V_INTERP_P1_F32 v8, v0, 1, 2, [m0] ; C8200900 V_INTERP_P2_F32 v8, [v8], v1, 1, 2, [m0] ; C8210901 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v9, v7, v8 ; 10121107 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x0 ; C0C60700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030A02 V_INTERP_P1_F32 v2, v0, 0, 2, [m0] ; C8080800 V_INTERP_P2_F32 v2, [v2], v1, 0, 2, [m0] ; C8090801 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v3, v2, v13, v9, 0, 0 ; D2820003 04261B02 V_INTERP_P1_F32 v9, v0, 3, 0, [m0] ; C8240300 V_INTERP_P2_F32 v9, [v9], v1, 3, 0, [m0] ; C8250301 V_MUL_F32_e32 v3, v3, v9 ; 10061303 V_MUL_F32_e32 v9, v6, v8 ; 10121106 V_MAD_F32 v9, v2, v12, v9, 0, 0 ; D2820009 04261902 V_INTERP_P1_F32 v14, v0, 2, 0, [m0] ; C8380200 V_INTERP_P2_F32 v14, [v14], v1, 2, 0, [m0] ; C8390201 V_MUL_F32_e32 v9, v9, v14 ; 10121D09 V_CVT_PKRTZ_F16_F32_e32 v3, v9, v3 ; 5E060709 V_MUL_F32_e32 v9, v5, v8 ; 10121105 V_MAD_F32 v9, v2, v11, v9, 0, 0 ; D2820009 04261702 V_INTERP_P1_F32 v14, v0, 1, 0, [m0] ; C8380100 V_INTERP_P2_F32 v14, [v14], v1, 1, 0, [m0] ; C8390101 V_MUL_F32_e32 v9, v9, v14 ; 10121D09 V_MUL_F32_e32 v4, v4, v8 ; 10081104 V_MAD_F32 v2, v2, v10, v4, 0, 0 ; D2820002 04121502 V_INTERP_P1_F32 v4, v0, 0, 0, [m0] ; C8100000 V_INTERP_P2_F32 v4, [v4], v1, 0, 0, [m0] ; C8110001 V_MUL_F32_e32 v0, v2, v4 ; 10000902 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v9 ; 5E001300 EXP 15, 0, 1, 1, 1, v0, v3, v0, v3 ; F8001C0F 03000300 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..6] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[4].zwzw, IN[0] 1: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[0], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[3] 4: ADD TEMP[1].x, CONST[6].xxxx, CONST[6].yyyy 5: MUL TEMP[0], TEMP[0], TEMP[1].xxxx 6: MUL TEMP[1].w, CONST[5].wwww, IN[2].wwww 7: MOV TEMP[1].w, TEMP[1].wwww 8: MOV TEMP[2].xy, IN[1].xyxx 9: MOV TEMP[1].xyz, IN[2].xyzx 10: MOV TEMP[2].zw, IMM[0].yyxy 11: MOV OUT[1], TEMP[1] 12: MOV OUT[2], TEMP[2] 13: MOV OUT[0], TEMP[0] 14: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0 %32 = add i32 %5, %7 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fadd float %25, %34 %51 = fadd float %26, %35 %52 = fmul float %51, %17 %53 = fmul float %51, %18 %54 = fmul float %51, %19 %55 = fmul float %51, %20 %56 = fmul float %50, %13 %57 = fadd float %56, %52 %58 = fmul float %50, %14 %59 = fadd float %58, %53 %60 = fmul float %50, %15 %61 = fadd float %60, %54 %62 = fmul float %50, %16 %63 = fadd float %62, %55 %64 = fadd float %57, %21 %65 = fadd float %59, %22 %66 = fadd float %61, %23 %67 = fadd float %63, %24 %68 = fadd float %28, %29 %69 = fmul float %64, %68 %70 = fmul float %65, %68 %71 = fmul float %66, %68 %72 = fmul float %67, %68 %73 = fmul float %27, %49 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %73) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %69, float %70, float %71, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v4 ; 100A0804 EXP 15, 32, 0, 0, 0, v1, v2, v3, v5 ; F800020F 05030201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 1.000000e+00 ; 7E0A02F2 V_MOV_B32_e32 v6, 0.000000e+00 ; 7E0C0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_ADD_F32_e32 v4, s4, v0 ; 06080004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s4, v1 ; 06000204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v0 ; 10020004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x19 ; C2028119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MUL_F32_e32 v1, v1, v2 ; 10020501 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xe ; C202010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v4, s4, v5, 0, 0 ; D2820005 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v2 ; 100A0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v2 ; 10000500 EXP 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..4] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 65280.0000} IMM[1] FLT32 {65280.0000, 255.0000, 0.0000, 0.0000} 0: MOV TEMP[0].y, IMM[0].xxxx 1: ADD TEMP[1].x, IMM[0].yyyy, -CONST[2].xxxx 2: MOV TEMP[0].x, TEMP[1].xxxx 3: FSNE TEMP[2].x, CONST[0].xxxx, -CONST[0].xxxx 4: UIF TEMP[2].xxxx :0 5: FSLT TEMP[2].x, IMM[0].xxxx, CONST[1].xxxx 6: UIF TEMP[2].xxxx :0 7: MUL TEMP[2].zw, CONST[3].xyxy, IN[1].xyxy 8: MOV TEMP[0].zw, TEMP[2].wwzw 9: FRC TEMP[2].xy, TEMP[2].zwzw 10: ADD TEMP[3].zw, TEMP[0], -TEMP[2].xyxy 11: MOV TEMP[0].zw, TEMP[3].wwzw 12: MAD TEMP[3].xy, IN[1], CONST[3], -TEMP[3].zwzw 13: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 14: RCP TEMP[5].x, CONST[3].xxxx 15: RCP TEMP[6].x, CONST[3].yyyy 16: MOV TEMP[5].y, TEMP[6].xxxx 17: MUL TEMP[4].xy, TEMP[4].zwzw, TEMP[5] 18: MOV TEMP[4].xy, TEMP[4].xyyy 19: MOV TEMP[4].w, IMM[0].zzzz 20: TXL TEMP[4], TEMP[4], SAMP[0], 2D 21: MOV TEMP[5].zw, TEMP[4] 22: DP2 TEMP[6].x, TEMP[4].wxxx, IMM[1].xyyy 23: ADD TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 24: FRC TEMP[7].w, TEMP[6].xxxx 25: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].xxxx 26: MOV TEMP[0].z, TEMP[6].zzzz 27: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].xxxx 28: FRC TEMP[6].z, TEMP[4].wwww 29: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 30: MOV TEMP[0].w, TEMP[4].wwww 31: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 32: FSGE TEMP[6].x, TEMP[1].xxxx, IMM[0].zzzz 33: UIF TEMP[6].xxxx :0 34: MOV TEMP[6].x, TEMP[4].wwww 35: ELSE :0 36: MOV TEMP[6].x, TEMP[4].zzzz 37: ENDIF 38: RCP TEMP[4].x, CONST[1].xxxx 39: MAD TEMP[4].w, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].xxxx 40: FRC TEMP[7].z, TEMP[4].wwww 41: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[7].zzzz 42: MUL TEMP[7].z, TEMP[4].wwww, TEMP[6].xxxx 43: MOV TEMP[8].x, -TEMP[4].wwww 44: FSGE TEMP[7].x, TEMP[7].zzzz, IMM[0].zzzz 45: UIF TEMP[7].xxxx :0 46: MOV TEMP[7].x, TEMP[4].wwww 47: ELSE :0 48: MOV TEMP[7].x, TEMP[8].xxxx 49: ENDIF 50: MOV TEMP[2].z, TEMP[7].xxxx 51: RCP TEMP[8].x, TEMP[7].xxxx 52: MUL TEMP[8].w, TEMP[6].xxxx, TEMP[8].xxxx 53: FRC TEMP[8].w, TEMP[8].wwww 54: MOV TEMP[2].w, TEMP[8].wwww 55: MUL TEMP[7].x, TEMP[8].wwww, TEMP[7].xxxx 56: MOV TEMP[5].x, TEMP[7].xxxx 57: RCP TEMP[4].x, TEMP[4].wwww 58: MUL TEMP[4].y, TEMP[4].xxxx, TEMP[6].xxxx 59: MOV TEMP[5].y, TEMP[4].yyyy 60: FRC TEMP[4].zw, TEMP[5].xyxy 61: MOV TEMP[0].zw, TEMP[4].wwzw 62: ADD TEMP[4].zw, -TEMP[0], TEMP[5].xyxy 63: MOV TEMP[0].zw, TEMP[4].wwzw 64: ADD TEMP[3].zw, TEMP[3].xyxy, TEMP[0] 65: MOV TEMP[0].zw, TEMP[3].wwzw 66: MUL TEMP[3].zw, TEMP[0], CONST[1].xxxx 67: MOV TEMP[0].zw, TEMP[3].wwzw 68: FRC TEMP[3].xy, TEMP[3].zwzw 69: ADD TEMP[3].zw, TEMP[0], -TEMP[3].xyxy 70: MOV TEMP[0].zw, TEMP[3].wwzw 71: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 72: MOV TEMP[0].w, TEMP[3].wwzw 73: RCP TEMP[2].x, CONST[4].xxxx 74: RCP TEMP[4].x, CONST[4].yyyy 75: MOV TEMP[2].y, TEMP[4].xxxx 76: MUL TEMP[3].xy, TEMP[3].zwzw, TEMP[2] 77: MOV TEMP[3].xy, TEMP[3].xyyy 78: MOV TEMP[3].w, IMM[0].zzzz 79: TXL TEMP[3], TEMP[3], SAMP[1], 2D 80: MOV TEMP[2], TEMP[3] 81: MOV TEMP[0].z, TEMP[3].wwww 82: ELSE :0 83: MOV TEMP[3].xy, IN[1].xyyy 84: TEX TEMP[3], TEMP[3], SAMP[1], 2D 85: MOV TEMP[2], TEMP[3] 86: MOV TEMP[0].z, TEMP[3].wwww 87: ENDIF 88: MUL TEMP[3].w, TEMP[0].zzzz, IN[0].wwww 89: MOV TEMP[3].w, TEMP[3].wwww 90: MOV TEMP[3].xyz, IN[0].xyzx 91: ELSE :0 92: FSLT TEMP[4].x, IMM[0].xxxx, CONST[1].xxxx 93: UIF TEMP[4].xxxx :0 94: MUL TEMP[4].zw, CONST[3].xyxy, IN[1].xyxy 95: MOV TEMP[0].zw, TEMP[4].wwzw 96: FRC TEMP[4].xy, TEMP[4].zwzw 97: MOV TEMP[2].xy, TEMP[4].xyxx 98: ADD TEMP[4].zw, TEMP[0], -TEMP[4].xyxy 99: MOV TEMP[0].zw, TEMP[4].wwzw 100: MAD TEMP[4].xy, IN[1], CONST[3], -TEMP[4].zwzw 101: MOV TEMP[2].xy, TEMP[4].xyxx 102: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 103: RCP TEMP[5].x, CONST[3].xxxx 104: RCP TEMP[6].x, CONST[3].yyyy 105: MOV TEMP[5].y, TEMP[6].xxxx 106: MUL TEMP[4].xy, TEMP[4].zwzw, TEMP[5] 107: MOV TEMP[4].xy, TEMP[4].xyyy 108: MOV TEMP[4].w, IMM[0].zzzz 109: TXL TEMP[4], TEMP[4], SAMP[0], 2D 110: MOV TEMP[5].zw, TEMP[4] 111: DP2 TEMP[6].x, TEMP[4].wxxx, IMM[1].xyyy 112: ADD TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 113: FRC TEMP[7].w, TEMP[6].xxxx 114: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].xxxx 115: MOV TEMP[0].z, TEMP[6].zzzz 116: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].xxxx 117: FRC TEMP[6].z, TEMP[4].wwww 118: MOV TEMP[2].z, TEMP[6].zzzz 119: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 120: MOV TEMP[0].w, TEMP[4].wwww 121: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 122: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz 123: UIF TEMP[1].xxxx :0 124: MOV TEMP[1].x, TEMP[4].wwww 125: ELSE :0 126: MOV TEMP[1].x, TEMP[4].zzzz 127: ENDIF 128: MOV TEMP[0].x, TEMP[1].xxxx 129: RCP TEMP[4].x, CONST[1].xxxx 130: MAD TEMP[4].y, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].xxxx 131: FRC TEMP[6].z, TEMP[4].yyyy 132: ADD TEMP[4].y, -TEMP[6].zzzz, TEMP[4].yyyy 133: MUL TEMP[6].z, TEMP[4].yyyy, TEMP[1].xxxx 134: MOV TEMP[7].x, -TEMP[4].yyyy 135: FSGE TEMP[6].x, TEMP[6].zzzz, IMM[0].zzzz 136: UIF TEMP[6].xxxx :0 137: MOV TEMP[6].x, TEMP[4].yyyy 138: ELSE :0 139: MOV TEMP[6].x, TEMP[7].xxxx 140: ENDIF 141: MOV TEMP[0].z, TEMP[6].xxxx 142: RCP TEMP[6].x, TEMP[6].xxxx 143: MUL TEMP[1].w, TEMP[6].xxxx, TEMP[1].xxxx 144: FRC TEMP[1].w, TEMP[1].wwww 145: MOV TEMP[0].w, TEMP[1].wwww 146: RCP TEMP[1].x, TEMP[4].yyyy 147: MOV TEMP[0].y, TEMP[1].xxxx 148: MUL TEMP[1].xy, TEMP[0].wyzw, TEMP[0].zxzw 149: MOV TEMP[5].xy, TEMP[1].xyxx 150: FRC TEMP[1].xy, TEMP[5] 151: MOV TEMP[0].xy, TEMP[1].xyxx 152: ADD TEMP[1].xy, -TEMP[0], TEMP[5] 153: MOV TEMP[0].xy, TEMP[1].xyxx 154: ADD TEMP[1].xy, TEMP[2], TEMP[0] 155: MOV TEMP[0].xy, TEMP[1].xyxx 156: MUL TEMP[1].xy, TEMP[0], CONST[1].xxxx 157: MOV TEMP[0].xy, TEMP[1].xyxx 158: FRC TEMP[1].zw, TEMP[1].xyxy 159: MOV TEMP[0].zw, TEMP[1].wwzw 160: ADD TEMP[1].xy, -TEMP[1].zwzw, TEMP[0] 161: MOV TEMP[0].xy, TEMP[1].xyxx 162: ADD TEMP[1].xy, TEMP[0], IMM[0].xxxx 163: MOV TEMP[0].xy, TEMP[1].xyxx 164: RCP TEMP[2].x, CONST[4].xxxx 165: RCP TEMP[1].x, CONST[4].yyyy 166: MOV TEMP[2].y, TEMP[1].xxxx 167: MUL TEMP[1].xy, TEMP[0], TEMP[2] 168: MOV TEMP[1].xy, TEMP[1].xyyy 169: MOV TEMP[1].w, IMM[0].zzzz 170: TXL TEMP[1], TEMP[1], SAMP[1], 2D 171: MOV TEMP[0].xyz, TEMP[1] 172: MOV TEMP[2].x, TEMP[1].wwww 173: ELSE :0 174: MOV TEMP[1].xy, IN[1].xyyy 175: TEX TEMP[1], TEMP[1], SAMP[1], 2D 176: MOV TEMP[0].xyz, TEMP[1] 177: MOV TEMP[2].x, TEMP[1].wwww 178: ENDIF 179: MUL TEMP[1].w, TEMP[2].xxxx, IN[0].wwww 180: MOV TEMP[0].w, TEMP[1].wwww 181: MOV TEMP[3], TEMP[0] 182: ENDIF 183: MOV OUT[0], TEMP[3] 184: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %31 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %32 = load <8 x i32> addrspace(2)* %31, !tbaa !0 %33 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %34 = load <4 x i32> addrspace(2)* %33, !tbaa !0 %35 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %36 = load <8 x i32> addrspace(2)* %35, !tbaa !0 %37 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %38 = load <4 x i32> addrspace(2)* %37, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %45 = fsub float -0.000000e+00, %26 %46 = fadd float 1.024000e+03, %45 %47 = fsub float -0.000000e+00, %24 %48 = fcmp une float %24, %47 %49 = sext i1 %48 to i32 %50 = bitcast i32 %49 to float %51 = bitcast float %50 to i32 %52 = icmp ne i32 %51, 0 %53 = fcmp olt float 5.000000e-01, %25 %54 = sext i1 %53 to i32 %55 = bitcast i32 %54 to float %56 = bitcast float %55 to i32 %57 = icmp ne i32 %56, 0 br i1 %52, label %IF, label %ELSE IF: ; preds = %main_body br i1 %57, label %IF37, label %ELSE38 ELSE: ; preds = %main_body br i1 %57, label %IF46, label %ELSE47 ENDIF: ; preds = %IF46, %ELSE47, %IF37, %ELSE38 %.sink56.sink = phi <4 x float> [ %172, %ELSE38 ], [ %165, %IF37 ], [ %284, %ELSE47 ], [ %274, %IF46 ] %temp12.0 = phi float [ %39, %ELSE38 ], [ %39, %IF37 ], [ %275, %IF46 ], [ %285, %ELSE47 ] %temp13.0 = phi float [ %40, %ELSE38 ], [ %40, %IF37 ], [ %276, %IF46 ], [ %286, %ELSE47 ] %temp14.0 = phi float [ %41, %ELSE38 ], [ %41, %IF37 ], [ %277, %IF46 ], [ %287, %ELSE47 ] %58 = extractelement <4 x float> %.sink56.sink, i32 3 %59 = fmul float %58, %42 %60 = call i32 @llvm.SI.packf16(float %temp12.0, float %temp13.0) %61 = bitcast i32 %60 to float %62 = call i32 @llvm.SI.packf16(float %temp14.0, float %59) %63 = bitcast i32 %62 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %61, float %63, float %61, float %63) ret void IF37: ; preds = %IF %64 = fmul float %27, %43 %65 = fmul float %28, %44 %66 = call float @llvm.AMDIL.fraction.(float %64) %67 = call float @llvm.AMDIL.fraction.(float %65) %68 = fsub float -0.000000e+00, %66 %69 = fadd float %64, %68 %70 = fsub float -0.000000e+00, %67 %71 = fadd float %65, %70 %72 = fsub float -0.000000e+00, %69 %73 = fmul float %43, %27 %74 = fadd float %73, %72 %75 = fsub float -0.000000e+00, %71 %76 = fmul float %44, %28 %77 = fadd float %76, %75 %78 = fadd float %69, 5.000000e-01 %79 = fadd float %71, 5.000000e-01 %80 = fdiv float 1.000000e+00, %27 %81 = fdiv float 1.000000e+00, %28 %82 = fmul float %78, %80 %83 = fmul float %79, %81 %84 = bitcast float %82 to i32 %85 = bitcast float %83 to i32 %86 = bitcast float 0.000000e+00 to i32 %87 = insertelement <4 x i32> undef, i32 %84, i32 0 %88 = insertelement <4 x i32> %87, i32 %85, i32 1 %89 = insertelement <4 x i32> %88, i32 %86, i32 2 %90 = insertelement <4 x i32> %89, i32 undef, i32 3 %91 = bitcast <8 x i32> %32 to <32 x i8> %92 = bitcast <4 x i32> %34 to <16 x i8> %93 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %90, <32 x i8> %91, <16 x i8> %92, i32 2) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 3 %96 = fmul float %95, 6.528000e+04 %97 = fmul float %94, 2.550000e+02 %98 = fadd float %96, %97 %99 = fadd float %98, 5.000000e-01 %100 = call float @llvm.AMDIL.fraction.(float %99) %101 = fsub float -0.000000e+00, %100 %102 = fadd float %101, %99 %103 = fmul float %26, %94 %104 = fadd float %103, 5.000000e-01 %105 = call float @llvm.AMDIL.fraction.(float %104) %106 = fsub float -0.000000e+00, %105 %107 = fadd float %104, %106 %108 = fadd float %102, 5.000000e-01 %109 = fadd float %107, 5.000000e-01 %110 = fcmp oge float %46, 0.000000e+00 %111 = sext i1 %110 to i32 %112 = bitcast i32 %111 to float %113 = bitcast float %112 to i32 %114 = icmp ne i32 %113, 0 %. = select i1 %114, float %109, float %108 %115 = fdiv float 1.000000e+00, %25 %116 = fmul float %29, %115 %117 = fadd float %116, 5.000000e-01 %118 = call float @llvm.AMDIL.fraction.(float %117) %119 = fsub float -0.000000e+00, %118 %120 = fadd float %117, %119 %121 = fmul float %120, %. %122 = fsub float -0.000000e+00, %120 %123 = fcmp oge float %121, 0.000000e+00 %124 = sext i1 %123 to i32 %125 = bitcast i32 %124 to float %126 = bitcast float %125 to i32 %127 = icmp ne i32 %126, 0 %temp28.0 = select i1 %127, float %120, float %122 %128 = fdiv float 1.000000e+00, %temp28.0 %129 = fmul float %., %128 %130 = call float @llvm.AMDIL.fraction.(float %129) %131 = fmul float %130, %temp28.0 %132 = fdiv float 1.000000e+00, %120 %133 = fmul float %132, %. %134 = call float @llvm.AMDIL.fraction.(float %131) %135 = call float @llvm.AMDIL.fraction.(float %133) %136 = fsub float -0.000000e+00, %134 %137 = fadd float %136, %131 %138 = fsub float -0.000000e+00, %135 %139 = fadd float %138, %133 %140 = fadd float %74, %137 %141 = fadd float %77, %139 %142 = fmul float %140, %25 %143 = fmul float %141, %25 %144 = call float @llvm.AMDIL.fraction.(float %142) %145 = call float @llvm.AMDIL.fraction.(float %143) %146 = fsub float -0.000000e+00, %144 %147 = fadd float %142, %146 %148 = fsub float -0.000000e+00, %145 %149 = fadd float %143, %148 %150 = fadd float %147, 5.000000e-01 %151 = fadd float %149, 5.000000e-01 %152 = fdiv float 1.000000e+00, %29 %153 = fdiv float 1.000000e+00, %30 %154 = fmul float %150, %152 %155 = fmul float %151, %153 %156 = bitcast float %154 to i32 %157 = bitcast float %155 to i32 %158 = bitcast float 0.000000e+00 to i32 %159 = insertelement <4 x i32> undef, i32 %156, i32 0 %160 = insertelement <4 x i32> %159, i32 %157, i32 1 %161 = insertelement <4 x i32> %160, i32 %158, i32 2 %162 = insertelement <4 x i32> %161, i32 undef, i32 3 %163 = bitcast <8 x i32> %36 to <32 x i8> %164 = bitcast <4 x i32> %38 to <16 x i8> %165 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %162, <32 x i8> %163, <16 x i8> %164, i32 2) br label %ENDIF ELSE38: ; preds = %IF %166 = bitcast float %43 to i32 %167 = bitcast float %44 to i32 %168 = insertelement <2 x i32> undef, i32 %166, i32 0 %169 = insertelement <2 x i32> %168, i32 %167, i32 1 %170 = bitcast <8 x i32> %36 to <32 x i8> %171 = bitcast <4 x i32> %38 to <16 x i8> %172 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %169, <32 x i8> %170, <16 x i8> %171, i32 2) br label %ENDIF IF46: ; preds = %ELSE %173 = fmul float %27, %43 %174 = fmul float %28, %44 %175 = call float @llvm.AMDIL.fraction.(float %173) %176 = call float @llvm.AMDIL.fraction.(float %174) %177 = fsub float -0.000000e+00, %175 %178 = fadd float %173, %177 %179 = fsub float -0.000000e+00, %176 %180 = fadd float %174, %179 %181 = fsub float -0.000000e+00, %178 %182 = fmul float %43, %27 %183 = fadd float %182, %181 %184 = fsub float -0.000000e+00, %180 %185 = fmul float %44, %28 %186 = fadd float %185, %184 %187 = fadd float %178, 5.000000e-01 %188 = fadd float %180, 5.000000e-01 %189 = fdiv float 1.000000e+00, %27 %190 = fdiv float 1.000000e+00, %28 %191 = fmul float %187, %189 %192 = fmul float %188, %190 %193 = bitcast float %191 to i32 %194 = bitcast float %192 to i32 %195 = bitcast float 0.000000e+00 to i32 %196 = insertelement <4 x i32> undef, i32 %193, i32 0 %197 = insertelement <4 x i32> %196, i32 %194, i32 1 %198 = insertelement <4 x i32> %197, i32 %195, i32 2 %199 = insertelement <4 x i32> %198, i32 undef, i32 3 %200 = bitcast <8 x i32> %32 to <32 x i8> %201 = bitcast <4 x i32> %34 to <16 x i8> %202 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %199, <32 x i8> %200, <16 x i8> %201, i32 2) %203 = extractelement <4 x float> %202, i32 0 %204 = extractelement <4 x float> %202, i32 3 %205 = fmul float %204, 6.528000e+04 %206 = fmul float %203, 2.550000e+02 %207 = fadd float %205, %206 %208 = fadd float %207, 5.000000e-01 %209 = call float @llvm.AMDIL.fraction.(float %208) %210 = fsub float -0.000000e+00, %209 %211 = fadd float %210, %208 %212 = fmul float %26, %203 %213 = fadd float %212, 5.000000e-01 %214 = call float @llvm.AMDIL.fraction.(float %213) %215 = fsub float -0.000000e+00, %214 %216 = fadd float %213, %215 %217 = fadd float %211, 5.000000e-01 %218 = fadd float %216, 5.000000e-01 %219 = fcmp oge float %46, 0.000000e+00 %220 = sext i1 %219 to i32 %221 = bitcast i32 %220 to float %222 = bitcast float %221 to i32 %223 = icmp ne i32 %222, 0 %.55 = select i1 %223, float %218, float %217 %224 = fdiv float 1.000000e+00, %25 %225 = fmul float %29, %224 %226 = fadd float %225, 5.000000e-01 %227 = call float @llvm.AMDIL.fraction.(float %226) %228 = fsub float -0.000000e+00, %227 %229 = fadd float %228, %226 %230 = fmul float %229, %.55 %231 = fsub float -0.000000e+00, %229 %232 = fcmp oge float %230, 0.000000e+00 %233 = sext i1 %232 to i32 %234 = bitcast i32 %233 to float %235 = bitcast float %234 to i32 %236 = icmp ne i32 %235, 0 %temp24.1 = select i1 %236, float %229, float %231 %237 = fdiv float 1.000000e+00, %temp24.1 %238 = fmul float %237, %.55 %239 = call float @llvm.AMDIL.fraction.(float %238) %240 = fdiv float 1.000000e+00, %229 %241 = fmul float %239, %temp24.1 %242 = fmul float %240, %.55 %243 = call float @llvm.AMDIL.fraction.(float %241) %244 = call float @llvm.AMDIL.fraction.(float %242) %245 = fsub float -0.000000e+00, %243 %246 = fadd float %245, %241 %247 = fsub float -0.000000e+00, %244 %248 = fadd float %247, %242 %249 = fadd float %183, %246 %250 = fadd float %186, %248 %251 = fmul float %249, %25 %252 = fmul float %250, %25 %253 = call float @llvm.AMDIL.fraction.(float %251) %254 = call float @llvm.AMDIL.fraction.(float %252) %255 = fsub float -0.000000e+00, %253 %256 = fadd float %255, %251 %257 = fsub float -0.000000e+00, %254 %258 = fadd float %257, %252 %259 = fadd float %256, 5.000000e-01 %260 = fadd float %258, 5.000000e-01 %261 = fdiv float 1.000000e+00, %29 %262 = fdiv float 1.000000e+00, %30 %263 = fmul float %259, %261 %264 = fmul float %260, %262 %265 = bitcast float %263 to i32 %266 = bitcast float %264 to i32 %267 = bitcast float 0.000000e+00 to i32 %268 = insertelement <4 x i32> undef, i32 %265, i32 0 %269 = insertelement <4 x i32> %268, i32 %266, i32 1 %270 = insertelement <4 x i32> %269, i32 %267, i32 2 %271 = insertelement <4 x i32> %270, i32 undef, i32 3 %272 = bitcast <8 x i32> %36 to <32 x i8> %273 = bitcast <4 x i32> %38 to <16 x i8> %274 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %271, <32 x i8> %272, <16 x i8> %273, i32 2) %275 = extractelement <4 x float> %274, i32 0 %276 = extractelement <4 x float> %274, i32 1 %277 = extractelement <4 x float> %274, i32 2 br label %ENDIF ELSE47: ; preds = %ELSE %278 = bitcast float %43 to i32 %279 = bitcast float %44 to i32 %280 = insertelement <2 x i32> undef, i32 %278, i32 0 %281 = insertelement <2 x i32> %280, i32 %279, i32 1 %282 = bitcast <8 x i32> %36 to <32 x i8> %283 = bitcast <4 x i32> %38 to <16 x i8> %284 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %281, <32 x i8> %282, <16 x i8> %283, i32 2) %285 = extractelement <4 x float> %284, i32 0 %286 = extractelement <4 x float> %284, i32 1 %287 = extractelement <4 x float> %284, i32 2 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v7, v0, 1, 1, [m0] ; C81C0500 V_INTERP_P2_F32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 V_INTERP_P1_F32 v6, v0, 0, 1, [m0] ; C8180400 V_INTERP_P2_F32 v6, [v6], v1, 0, 1, [m0] ; C8190401 V_INTERP_P1_F32 v2, v0, 3, 0, [m0] ; C8080300 V_INTERP_P2_F32 v2, [v2], v1, 3, 0, [m0] ; C8090301 V_INTERP_P1_F32 v3, v0, 2, 0, [m0] ; C80C0200 V_INTERP_P2_F32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 V_INTERP_P1_F32 v4, v0, 1, 0, [m0] ; C8100100 V_INTERP_P2_F32 v4, [v4], v1, 1, 0, [m0] ; C8110101 V_INTERP_P1_F32 v5, v0, 0, 0, [m0] ; C8140000 V_INTERP_P2_F32 v5, [v5], v1, 0, 0, [m0] ; C8150001 S_LOAD_DWORDX4 s[28:31], s[2:3], 0x0 ; C08E0300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[28:31], 0x0 ; C2001D00 V_MOV_B32_e32 v0, 0x80000000 ; 7E0002FF 80000000 S_WAITCNT lgkmcnt(0) ; BF8C007F V_XOR_B32_e32 v0, s0, v0 ; 3A000000 V_CMP_NEQ_F32_e32 vcc, s0, v0 ; 7C1A0000 V_CNDMASK_B32_e64 v0, 0, -1, vcc, 0, 0, 0, 0 ; D2000000 01A98280 V_CMP_EQ_I32_e64 s[32:33], v0, 0, 0, 0 ; D1040020 00010100 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x8 ; C0C40708 S_LOAD_DWORDX4 s[16:19], s[4:5], 0x0 ; C0880500 S_LOAD_DWORDX8 s[20:27], s[6:7], 0x0 ; C0CA0700 S_BUFFER_LOAD_DWORD s4, s[28:31], 0x11 ; C2021D11 S_BUFFER_LOAD_DWORD s5, s[28:31], 0x10 ; C2029D10 S_BUFFER_LOAD_DWORD s6, s[28:31], 0xd ; C2031D0D S_BUFFER_LOAD_DWORD s7, s[28:31], 0xc ; C2039D0C S_BUFFER_LOAD_DWORD s34, s[28:31], 0x8 ; C2111D08 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s34 ; 7E000222 V_SUB_F32_e32 v9, 1.024000e+03, v0 ; 081200FF 44800000 S_BUFFER_LOAD_DWORD s28, s[28:31], 0x4 ; C20E1D04 S_WAITCNT lgkmcnt(0) ; BF8C007F V_CMP_GT_F32_e64 s[30:31], s28, 5.000000e-01, 0, 0 ; D008001E 0001E01C V_CNDMASK_B32_e64 v13, 0, -1, s[30:31], 0, 0, 0, 0 ; D200000D 00798280 V_MOV_B32_e32 v0, s4 ; 7E000204 V_MOV_B32_e32 v8, s5 ; 7E100205 V_MOV_B32_e32 v12, s6 ; 7E180206 V_MOV_B32_e32 v11, s7 ; 7E160207 V_MOV_B32_e32 v10, s34 ; 7E140222 V_MOV_B32_e32 v1, s28 ; 7E02021C S_AND_SAVEEXEC_B64 s[4:5], s[32:33] ; BE842420 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E S_CBRANCH_EXECZ BB0_1 ; BF880000 V_CMP_EQ_I32_e64 s[6:7], v13, 0, 0, 0 ; D1040006 0001010D S_AND_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862406 S_XOR_B64 s[6:7], exec, s[6:7] ; 8986067E IMAGE_SAMPLE v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020E06 S_WAITCNT vmcnt(0) ; BF8C0770 V_MOV_B32_e32 v18, v14 ; 7E24030E V_MOV_B32_e32 v19, v15 ; 7E26030F V_MOV_B32_e32 v20, v16 ; 7E280310 V_MOV_B32_e32 v21, v17 ; 7E2A0311 S_OR_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862506 S_XOR_B64 exec, exec, s[6:7] ; 89FE067E S_CBRANCH_EXECZ BB0_4 ; BF880000 V_MUL_F32_e32 v14, v7, v12 ; 101C1907 V_FRACT_F32_e32 v15, v14 ; 7E1E410E V_SUB_F32_e32 v15, v14, v15 ; 081E1F0E V_ADD_F32_e32 v16, 5.000000e-01, v15 ; 06201EF0 V_RCP_F32_e32 v17, v12 ; 7E22550C V_MUL_F32_e32 v17, v16, v17 ; 10222310 V_MUL_F32_e32 v20, v6, v11 ; 10281706 V_FRACT_F32_e32 v21, v20 ; 7E2A4114 V_SUB_F32_e32 v21, v20, v21 ; 082A2B14 V_ADD_F32_e32 v22, 5.000000e-01, v21 ; 062C2AF0 V_RCP_F32_e32 v23, v11 ; 7E2E550B V_MUL_F32_e32 v16, v22, v23 ; 10202F16 V_MOV_B32_e32 v18, 0 ; 7E240280 IMAGE_SAMPLE_L v[22:23], 9, 0, 0, 0, 0, 0, 0, 0, v[16:19], s[20:27], s[16:19] ; F0900900 00851610 V_MOV_B32_e32 v24, 2.550000e+02 ; 7E3002FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v24, v22, v24 ; 10303116 V_MOV_B32_e32 v25, 6.528000e+04 ; 7E3202FF 477F0000 V_MAD_F32 v24, v23, v25, v24, 0, 0 ; D2820018 04623317 V_ADD_F32_e32 v24, 5.000000e-01, v24 ; 063030F0 V_FRACT_F32_e32 v25, v24 ; 7E324118 V_SUB_F32_e32 v24, v24, v25 ; 08303318 V_ADD_F32_e32 v24, 5.000000e-01, v24 ; 063030F0 V_MAD_F32 v22, v10, v22, 5.000000e-01, 0, 0 ; D2820016 03C22D0A V_FRACT_F32_e32 v23, v22 ; 7E2E4116 V_SUB_F32_e32 v22, v22, v23 ; 082C2F16 V_ADD_F32_e32 v22, 5.000000e-01, v22 ; 062C2CF0 V_CMP_GE_F32_e64 s[28:29], v9, 0.000000e+00, 0, 0 ; D00C001C 00010109 V_CNDMASK_B32_e64 v22, v24, v22, s[28:29], 0, 0, 0, 0 ; D2000016 00722D18 V_RCP_F32_e32 v23, v1 ; 7E2E5501 V_MAD_F32 v23, v8, v23, 5.000000e-01, 0, 0 ; D2820017 03C22F08 V_FRACT_F32_e32 v24, v23 ; 7E304117 V_SUB_F32_e32 v23, v23, v24 ; 082E3117 V_MUL_F32_e32 v24, v23, v22 ; 10302D17 V_CMP_GE_F32_e64 s[28:29], v24, 0.000000e+00, 0, 0 ; D00C001C 00010118 V_MOV_B32_e32 v24, 0x80000000 ; 7E3002FF 80000000 V_XOR_B32_e32 v24, v23, v24 ; 3A303117 V_CNDMASK_B32_e64 v24, v24, v23, s[28:29], 0, 0, 0, 0 ; D2000018 00722F18 V_RCP_F32_e32 v25, v24 ; 7E325518 V_MUL_F32_e32 v25, v25, v22 ; 10322D19 V_FRACT_F32_e32 v25, v25 ; 7E324119 V_MUL_F32_e32 v24, v25, v24 ; 10303119 V_FRACT_F32_e32 v25, v24 ; 7E324118 V_SUB_F32_e32 v24, v24, v25 ; 08303318 V_SUB_F32_e32 v20, v20, v21 ; 08282B14 V_ADD_F32_e32 v20, v20, v24 ; 06283114 V_MUL_F32_e32 v20, v20, v1 ; 10280314 V_FRACT_F32_e32 v21, v20 ; 7E2A4114 V_SUB_F32_e32 v20, v20, v21 ; 08282B14 V_ADD_F32_e32 v20, 5.000000e-01, v20 ; 062828F0 V_RCP_F32_e32 v21, v8 ; 7E2A5508 V_MUL_F32_e32 v16, v20, v21 ; 10202B14 V_RCP_F32_e32 v20, v23 ; 7E285517 V_MUL_F32_e32 v20, v20, v22 ; 10282D14 V_FRACT_F32_e32 v21, v20 ; 7E2A4114 V_SUB_F32_e32 v20, v20, v21 ; 08282B14 V_SUB_F32_e32 v14, v14, v15 ; 081C1F0E V_ADD_F32_e32 v14, v14, v20 ; 061C290E V_MUL_F32_e32 v14, v14, v1 ; 101C030E V_FRACT_F32_e32 v15, v14 ; 7E1E410E V_SUB_F32_e32 v14, v14, v15 ; 081C1F0E V_ADD_F32_e32 v14, 5.000000e-01, v14 ; 061C1CF0 V_RCP_F32_e32 v15, v0 ; 7E1E5500 V_MUL_F32_e32 v17, v14, v15 ; 10221F0E IMAGE_SAMPLE_L v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[16:19], s[8:15], s[0:3] ; F0900F00 00020E10 S_WAITCNT vmcnt(0) ; BF8C0770 V_MOV_B32_e32 v18, v14 ; 7E24030E V_MOV_B32_e32 v19, v15 ; 7E26030F V_MOV_B32_e32 v20, v16 ; 7E280310 V_MOV_B32_e32 v21, v17 ; 7E2A0311 S_OR_B64 exec, exec, s[6:7] ; 88FE067E S_OR_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842504 S_XOR_B64 exec, exec, s[4:5] ; 89FE047E S_CBRANCH_EXECZ BB0_6 ; BF880000 V_CMP_EQ_I32_e64 s[6:7], v13, 0, 0, 0 ; D1040006 0001010D S_AND_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862406 S_XOR_B64 s[6:7], exec, s[6:7] ; 8986067E IMAGE_SAMPLE v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00021206 V_MOV_B32_e32 v16, v3 ; 7E200303 V_MOV_B32_e32 v15, v4 ; 7E1E0304 V_MOV_B32_e32 v14, v5 ; 7E1C0305 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862506 S_WAITCNT expcnt(0) ; BF8C070F S_XOR_B64 exec, exec, s[6:7] ; 89FE067E S_CBRANCH_EXECZ BB0_5 ; BF880000 V_MUL_F32_e32 v13, v7, v12 ; 101A1907 V_FRACT_F32_e32 v14, v13 ; 7E1C410D V_SUB_F32_e32 v14, v13, v14 ; 081C1D0D V_ADD_F32_e32 v15, 5.000000e-01, v14 ; 061E1CF0 V_RCP_F32_e32 v12, v12 ; 7E18550C V_MUL_F32_e32 v16, v15, v12 ; 1020190F V_MUL_F32_e32 v6, v6, v11 ; 100C1706 V_FRACT_F32_e32 v7, v6 ; 7E0E4106 V_SUB_F32_e32 v7, v6, v7 ; 080E0F06 V_ADD_F32_e32 v12, 5.000000e-01, v7 ; 06180EF0 V_RCP_F32_e32 v11, v11 ; 7E16550B V_MUL_F32_e32 v15, v12, v11 ; 101E170C V_MOV_B32_e32 v17, 0 ; 7E220280 IMAGE_SAMPLE_L v[11:12], 9, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[20:27], s[16:19] ; F0900900 00850B0F V_MOV_B32_e32 v19, 2.550000e+02 ; 7E2602FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v19, v11, v19 ; 1026270B V_MOV_B32_e32 v20, 6.528000e+04 ; 7E2802FF 477F0000 V_MAD_F32 v19, v12, v20, v19, 0, 0 ; D2820013 044E290C V_ADD_F32_e32 v19, 5.000000e-01, v19 ; 062626F0 V_FRACT_F32_e32 v20, v19 ; 7E284113 V_SUB_F32_e32 v19, v19, v20 ; 08262913 V_ADD_F32_e32 v19, 5.000000e-01, v19 ; 062626F0 V_MAD_F32 v10, v10, v11, 5.000000e-01, 0, 0 ; D282000A 03C2170A V_FRACT_F32_e32 v11, v10 ; 7E16410A V_SUB_F32_e32 v10, v10, v11 ; 0814170A V_ADD_F32_e32 v10, 5.000000e-01, v10 ; 061414F0 V_CMP_GE_F32_e64 s[28:29], v9, 0.000000e+00, 0, 0 ; D00C001C 00010109 V_CNDMASK_B32_e64 v9, v19, v10, s[28:29], 0, 0, 0, 0 ; D2000009 00721513 V_RCP_F32_e32 v10, v1 ; 7E145501 V_MAD_F32 v10, v8, v10, 5.000000e-01, 0, 0 ; D282000A 03C21508 V_FRACT_F32_e32 v11, v10 ; 7E16410A V_SUB_F32_e32 v10, v10, v11 ; 0814170A V_MUL_F32_e32 v11, v10, v9 ; 1016130A V_CMP_GE_F32_e64 s[28:29], v11, 0.000000e+00, 0, 0 ; D00C001C 0001010B V_MOV_B32_e32 v11, 0x80000000 ; 7E1602FF 80000000 V_XOR_B32_e32 v11, v10, v11 ; 3A16170A V_CNDMASK_B32_e64 v11, v11, v10, s[28:29], 0, 0, 0, 0 ; D200000B 0072150B V_RCP_F32_e32 v12, v11 ; 7E18550B V_MUL_F32_e32 v12, v9, v12 ; 10181909 V_FRACT_F32_e32 v12, v12 ; 7E18410C V_MUL_F32_e32 v11, v12, v11 ; 1016170C V_FRACT_F32_e32 v12, v11 ; 7E18410B V_SUB_F32_e32 v11, v11, v12 ; 0816190B V_SUB_F32_e32 v6, v6, v7 ; 080C0F06 V_ADD_F32_e32 v6, v6, v11 ; 060C1706 V_MUL_F32_e32 v6, v6, v1 ; 100C0306 V_FRACT_F32_e32 v7, v6 ; 7E0E4106 V_SUB_F32_e32 v6, v6, v7 ; 080C0F06 V_ADD_F32_e32 v6, 5.000000e-01, v6 ; 060C0CF0 V_RCP_F32_e32 v7, v8 ; 7E0E5508 V_MUL_F32_e32 v15, v6, v7 ; 101E0F06 V_RCP_F32_e32 v6, v10 ; 7E0C550A V_MUL_F32_e32 v6, v6, v9 ; 100C1306 V_FRACT_F32_e32 v7, v6 ; 7E0E4106 V_SUB_F32_e32 v6, v6, v7 ; 080C0F06 V_SUB_F32_e32 v7, v13, v14 ; 080E1D0D V_ADD_F32_e32 v6, v7, v6 ; 060C0D07 V_MUL_F32_e32 v1, v6, v1 ; 10020306 V_FRACT_F32_e32 v6, v1 ; 7E0C4101 V_SUB_F32_e32 v1, v1, v6 ; 08020D01 V_ADD_F32_e32 v1, 5.000000e-01, v1 ; 060202F0 V_RCP_F32_e32 v0, v0 ; 7E005500 V_MUL_F32_e32 v16, v1, v0 ; 10200101 IMAGE_SAMPLE_L v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[8:15], s[0:3] ; F0900F00 0002120F V_MOV_B32_e32 v16, v3 ; 7E200303 V_MOV_B32_e32 v15, v4 ; 7E1E0304 V_MOV_B32_e32 v14, v5 ; 7E1C0305 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_B64 exec, exec, s[6:7] ; 88FE067E S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_CVT_PKRTZ_F16_F32_e32 v0, v14, v15 ; 5E001F0E V_MUL_F32_e32 v1, v21, v2 ; 10020515 V_CVT_PKRTZ_F16_F32_e32 v1, v16, v1 ; 5E020310 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..24] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: F2I TEMP[0].x, IN[3].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: UARL ADDR[0].x, TEMP[0].xxxx 3: MOV TEMP[1], CONST[ADDR[0].x+4].zwzw 4: UARL ADDR[0].x, TEMP[0].xxxx 5: MAD TEMP[1].xy, IN[0], CONST[ADDR[0].x+4], TEMP[1] 6: UARL ADDR[0].x, TEMP[0].xxxx 7: UARL ADDR[0].x, TEMP[0].xxxx 8: MOV TEMP[2], CONST[ADDR[0].x+13].zwzw 9: UARL ADDR[0].x, TEMP[0].xxxx 10: MAD TEMP[2].xy, IN[1], CONST[ADDR[0].x+13], TEMP[2] 11: MOV TEMP[2].xy, TEMP[2].xyxx 12: MUL TEMP[3], TEMP[1].yyyy, CONST[1] 13: MAD TEMP[0], TEMP[1].xxxx, CONST[0], TEMP[3] 14: ADD TEMP[0], TEMP[0], CONST[3] 15: ADD TEMP[1].x, CONST[24].xxxx, CONST[24].yyyy 16: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 17: MUL TEMP[0], CONST[22], IN[2] 18: MUL TEMP[3].w, TEMP[0].wwww, CONST[23].wwww 19: MOV TEMP[3].w, TEMP[3].wwww 20: MOV TEMP[3].xyz, TEMP[0].xyzx 21: MOV TEMP[0].xy, IN[3].yzyy 22: MOV TEMP[2].zw, IMM[0].yyxy 23: MOV TEMP[0].zw, IMM[0].yyxy 24: MOV OUT[1], TEMP[3] 25: MOV OUT[2], TEMP[2] 26: MOV OUT[0], TEMP[1] 27: MOV OUT[3], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %39 = load <16 x i8> addrspace(2)* %38, !tbaa !0 %40 = add i32 %5, %7 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = fptosi float %56 to i32 %60 = bitcast i32 %59 to float %61 = bitcast float %60 to i32 %62 = shl i32 %61, 4 %63 = add i32 %62, 72 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %61, 4 %66 = add i32 %65, 76 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = bitcast float %60 to i32 %69 = shl i32 %68, 4 %70 = add i32 %69, 64 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = fmul float %36, %71 %73 = fadd float %72, %64 %74 = shl i32 %68, 4 %75 = add i32 %74, 68 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = fmul float %37, %76 %78 = fadd float %77, %67 %79 = bitcast float %60 to i32 %80 = shl i32 %79, 4 %81 = add i32 %80, 216 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %79, 4 %84 = add i32 %83, 220 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = bitcast float %60 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 208 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = fmul float %42, %89 %91 = fadd float %90, %82 %92 = shl i32 %86, 4 %93 = add i32 %92, 212 %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %93) %95 = fmul float %43, %94 %96 = fadd float %95, %85 %97 = fmul float %78, %17 %98 = fmul float %78, %18 %99 = fmul float %78, %19 %100 = fmul float %78, %20 %101 = fmul float %73, %13 %102 = fadd float %101, %97 %103 = fmul float %73, %14 %104 = fadd float %103, %98 %105 = fmul float %73, %15 %106 = fadd float %105, %99 %107 = fmul float %73, %16 %108 = fadd float %107, %100 %109 = fadd float %102, %21 %110 = fadd float %104, %22 %111 = fadd float %106, %23 %112 = fadd float %108, %24 %113 = fadd float %30, %31 %114 = fmul float %109, %113 %115 = fmul float %110, %113 %116 = fmul float %111, %113 %117 = fmul float %112, %113 %118 = fmul float %25, %48 %119 = fmul float %26, %49 %120 = fmul float %27, %50 %121 = fmul float %28, %51 %122 = fmul float %121, %29 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %118, float %119, float %120, float %122) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %91, float %96, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %57, float %58, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %115, float %116, float %117) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5a ; C202015A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x59 ; C2020159 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x58 ; C2020158 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5b ; C202015B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v4 ; 10020804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5f ; C202015F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 EXP 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 S_LOAD_DWORDX4 s[4:7], s[8:9], 0xc ; C082090C S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_I32_F32_e32 v5, v1 ; 7E0A1101 V_LSHLREV_B32_e32 v5, 4, v5 ; 340A0A84 V_ADD_I32_e32 v6, 0xdc, v5 ; 4A0C0AFF 000000DC BUFFER_LOAD_DWORD v6, s[0:3] + v6 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000606 V_ADD_I32_e32 v7, 0xd4, v5 ; 4A0E0AFF 000000D4 BUFFER_LOAD_DWORD v7, s[0:3] + v7 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000707 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[8:11], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010800 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v6, v9, v7, v6, 0, 0 ; D2820006 041A0F09 V_ADD_I32_e32 v7, 0xd8, v5 ; 4A0E0AFF 000000D8 BUFFER_LOAD_DWORD v7, s[0:3] + v7 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000707 V_ADD_I32_e32 v12, 0xd0, v5 ; 4A180AFF 000000D0 BUFFER_LOAD_DWORD v12, s[0:3] + v12 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000C0C S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v7, v8, v12, v7, 0, 0 ; D2820007 041E1908 V_MOV_B32_e32 v8, 1.000000e+00 ; 7E1002F2 V_MOV_B32_e32 v9, 0.000000e+00 ; 7E120280 EXP 15, 33, 0, 0, 0, v7, v6, v9, v8 ; F800021F 08090607 EXP 15, 34, 0, 0, 0, v2, v3, v9, v8 ; F800022F 08090302 S_WAITCNT expcnt(0) ; BF8C070F V_ADD_I32_e32 v1, 0x48, v5 ; 4A020AFF 00000048 BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 V_ADD_I32_e32 v2, 64, v5 ; 4A040AC0 BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[6:9], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010600 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v6, v2, v1, 0, 0 ; D2820000 04060506 V_ADD_I32_e32 v1, 0x4c, v5 ; 4A020AFF 0000004C BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 V_ADD_I32_e32 v2, 0x44, v5 ; 4A040AFF 00000044 BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v7, v2, v1, 0, 0 ; D2820001 04060507 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v1 ; 10040204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v0, s4, v2, 0, 0 ; D2820002 04080900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v2, s4, v2 ; 06040404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x60 ; C2020160 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x61 ; C2028161 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s5 ; 7E060205 V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v2, v2, v3 ; 10040702 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v1 ; 10080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v0, s4, v4, 0, 0 ; D2820004 04100900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xe ; C202010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s4, v4 ; 06080804 V_MUL_F32_e32 v4, v4, v3 ; 10080704 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v1 ; 100A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v0, s4, v5, 0, 0 ; D2820005 04140900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v3 ; 100A0705 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s4, v1, 0, 0 ; D2820000 04040900 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v3 ; 10000700 EXP 15, 12, 0, 1, 0, v0, v5, v4, v2 ; F80008CF 02040500 S_ENDPGM ; BF810000 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL IN[3], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[4] DCL CONST[0..1] DCL TEMP[0] DCL TEMP[1..5], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[4].xxxx, CONST[4].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[1].xyxx, CONST[1].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 8: UIF TEMP[5].xxxx :2 9: KILL 10: ENDIF 11: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 12: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 13: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 14: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 15: UIF TEMP[5].xxxx :2 16: KILL 17: ENDIF 18: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 19: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 20: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 21: OR TEMP[4].x, TEMP[4].xxxx, TEMP[2].yyyy 22: UIF TEMP[4].xxxx :2 23: KILL 24: ENDIF 25: ADD TEMP[3].xyz, CONST[0].wwww, -TEMP[1].yyyy 26: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 27: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 28: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 29: UIF TEMP[2].xxxx :2 30: KILL 31: ENDIF 32: MOV TEMP[1].xy, IN[2].xyyy 33: TEX TEMP[1], TEMP[1], SAMP[1], 2D 34: MUL TEMP[3], TEMP[1], IN[3].yyyy 35: MOV TEMP[1].xy, IN[2].xyyy 36: TEX TEMP[1], TEMP[1], SAMP[0], 2D 37: MAD TEMP[3], IN[3].xxxx, TEMP[1], TEMP[3] 38: MUL TEMP[1], TEMP[3], IN[1] 39: MOV OUT[0], TEMP[1] 40: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %50 = fmul float %15, %32 %51 = fadd float %50, %33 %52 = fmul float %14, %28 %53 = fadd float %52, %30 %54 = fmul float %51, %29 %55 = fadd float %54, %31 %56 = fsub float -0.000000e+00, %24 %57 = fsub float -0.000000e+00, %26 %58 = fadd float %56, %53 %59 = fadd float %56, %53 %60 = fadd float %56, %53 %61 = fcmp olt float %58, 0.000000e+00 %62 = sext i1 %61 to i32 %63 = fcmp olt float %59, 0.000000e+00 %64 = sext i1 %63 to i32 %65 = fcmp olt float %60, 0.000000e+00 %66 = sext i1 %65 to i32 %67 = bitcast i32 %62 to float %68 = bitcast i32 %64 to float %69 = bitcast i32 %66 to float %70 = bitcast float %67 to i32 %71 = bitcast float %69 to i32 %72 = or i32 %70, %71 %73 = bitcast i32 %72 to float %74 = bitcast float %73 to i32 %75 = bitcast float %68 to i32 %76 = or i32 %74, %75 %77 = bitcast i32 %76 to float %78 = bitcast float %77 to i32 %79 = icmp ne i32 %78, 0 br i1 %79, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %80 = fsub float -0.000000e+00, %53 %81 = fadd float %25, %80 %82 = fsub float -0.000000e+00, %53 %83 = fadd float %25, %82 %84 = fsub float -0.000000e+00, %53 %85 = fadd float %25, %84 %86 = fcmp olt float %81, 0.000000e+00 %87 = sext i1 %86 to i32 %88 = fcmp olt float %83, 0.000000e+00 %89 = sext i1 %88 to i32 %90 = fcmp olt float %85, 0.000000e+00 %91 = sext i1 %90 to i32 %92 = bitcast i32 %87 to float %93 = bitcast i32 %89 to float %94 = bitcast i32 %91 to float %95 = bitcast float %92 to i32 %96 = bitcast float %94 to i32 %97 = or i32 %95, %96 %98 = bitcast i32 %97 to float %99 = bitcast float %98 to i32 %100 = bitcast float %93 to i32 %101 = or i32 %99, %100 %102 = bitcast i32 %101 to float %103 = bitcast float %102 to i32 %104 = icmp ne i32 %103, 0 br i1 %104, label %IF25, label %ENDIF24 IF25: ; preds = %ENDIF call void @llvm.AMDGPU.kilp() br label %ENDIF24 ENDIF24: ; preds = %ENDIF, %IF25 %105 = fadd float %57, %55 %106 = fadd float %57, %55 %107 = fadd float %57, %55 %108 = fcmp olt float %105, 0.000000e+00 %109 = sext i1 %108 to i32 %110 = fcmp olt float %106, 0.000000e+00 %111 = sext i1 %110 to i32 %112 = fcmp olt float %107, 0.000000e+00 %113 = sext i1 %112 to i32 %114 = bitcast i32 %109 to float %115 = bitcast i32 %111 to float %116 = bitcast i32 %113 to float %117 = bitcast float %114 to i32 %118 = bitcast float %116 to i32 %119 = or i32 %117, %118 %120 = bitcast i32 %119 to float %121 = bitcast float %120 to i32 %122 = bitcast float %115 to i32 %123 = or i32 %121, %122 %124 = bitcast i32 %123 to float %125 = bitcast float %124 to i32 %126 = icmp ne i32 %125, 0 br i1 %126, label %IF28, label %ENDIF27 IF28: ; preds = %ENDIF24 call void @llvm.AMDGPU.kilp() br label %ENDIF27 ENDIF27: ; preds = %ENDIF24, %IF28 %127 = fsub float -0.000000e+00, %55 %128 = fadd float %27, %127 %129 = fsub float -0.000000e+00, %55 %130 = fadd float %27, %129 %131 = fsub float -0.000000e+00, %55 %132 = fadd float %27, %131 %133 = fcmp olt float %128, 0.000000e+00 %134 = sext i1 %133 to i32 %135 = fcmp olt float %130, 0.000000e+00 %136 = sext i1 %135 to i32 %137 = fcmp olt float %132, 0.000000e+00 %138 = sext i1 %137 to i32 %139 = bitcast i32 %134 to float %140 = bitcast i32 %136 to float %141 = bitcast i32 %138 to float %142 = bitcast float %139 to i32 %143 = bitcast float %141 to i32 %144 = or i32 %142, %143 %145 = bitcast i32 %144 to float %146 = bitcast float %145 to i32 %147 = bitcast float %140 to i32 %148 = or i32 %146, %147 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = icmp ne i32 %150, 0 br i1 %151, label %IF31, label %ENDIF30 IF31: ; preds = %ENDIF27 call void @llvm.AMDGPU.kilp() br label %ENDIF30 ENDIF30: ; preds = %ENDIF27, %IF31 %152 = bitcast float %46 to i32 %153 = bitcast float %47 to i32 %154 = insertelement <2 x i32> undef, i32 %152, i32 0 %155 = insertelement <2 x i32> %154, i32 %153, i32 1 %156 = bitcast <8 x i32> %39 to <32 x i8> %157 = bitcast <4 x i32> %41 to <16 x i8> %158 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %155, <32 x i8> %156, <16 x i8> %157, i32 2) %159 = extractelement <4 x float> %158, i32 0 %160 = extractelement <4 x float> %158, i32 1 %161 = extractelement <4 x float> %158, i32 2 %162 = extractelement <4 x float> %158, i32 3 %163 = fmul float %159, %49 %164 = fmul float %160, %49 %165 = fmul float %161, %49 %166 = fmul float %162, %49 %167 = bitcast float %46 to i32 %168 = bitcast float %47 to i32 %169 = insertelement <2 x i32> undef, i32 %167, i32 0 %170 = insertelement <2 x i32> %169, i32 %168, i32 1 %171 = bitcast <8 x i32> %35 to <32 x i8> %172 = bitcast <4 x i32> %37 to <16 x i8> %173 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %170, <32 x i8> %171, <16 x i8> %172, i32 2) %174 = extractelement <4 x float> %173, i32 0 %175 = extractelement <4 x float> %173, i32 1 %176 = extractelement <4 x float> %173, i32 2 %177 = extractelement <4 x float> %173, i32 3 %178 = fmul float %48, %174 %179 = fadd float %178, %163 %180 = fmul float %48, %175 %181 = fadd float %180, %164 %182 = fmul float %48, %176 %183 = fadd float %182, %165 %184 = fmul float %48, %177 %185 = fadd float %184, %166 %186 = fmul float %179, %42 %187 = fmul float %181, %43 %188 = fmul float %183, %44 %189 = fmul float %185, %45 %190 = call i32 @llvm.SI.packf16(float %186, float %187) %191 = bitcast i32 %190 to float %192 = call i32 @llvm.SI.packf16(float %188, float %189) %193 = bitcast i32 %192 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %191, float %193, float %191, float %193) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v4, v0, 1, 2, [m0] ; C8100900 V_INTERP_P2_F32 v4, [v4], v1, 1, 2, [m0] ; C8110901 V_INTERP_P1_F32 v5, v0, 0, 2, [m0] ; C8140800 V_INTERP_P2_F32 v5, [v5], v1, 0, 2, [m0] ; C8150801 V_INTERP_P1_F32 v7, v0, 1, 1, [m0] ; C81C0500 V_INTERP_P2_F32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 V_INTERP_P1_F32 v6, v0, 0, 1, [m0] ; C8180400 V_INTERP_P2_F32 v6, [v6], v1, 0, 1, [m0] ; C8190401 V_INTERP_P1_F32 v8, v0, 3, 0, [m0] ; C8200300 V_INTERP_P2_F32 v8, [v8], v1, 3, 0, [m0] ; C8210301 V_INTERP_P1_F32 v9, v0, 2, 0, [m0] ; C8240200 V_INTERP_P2_F32 v9, [v9], v1, 2, 0, [m0] ; C8250201 V_INTERP_P1_F32 v10, v0, 1, 0, [m0] ; C8280100 V_INTERP_P2_F32 v10, [v10], v1, 1, 0, [m0] ; C8290101 V_INTERP_P1_F32 v11, v0, 0, 0, [m0] ; C82C0000 V_INTERP_P2_F32 v11, [v11], v1, 0, 0, [m0] ; C82D0001 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x4 ; C2040104 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x6 ; C2048106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s9 ; 7E000209 V_MAD_F32 v0, v2, s8, v0, 0, 0 ; D2820000 04001102 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v1, s8, v0 ; 0A020008 V_CMP_LT_F32_e64 s[8:9], v1, 0.000000e+00, 0, 0 ; D0020008 00010101 V_CNDMASK_B32_e64 v1, 0, -1, s[8:9], 0, 0, 0, 0 ; D2000001 00218280 V_OR_B32_e32 v1, v1, v1 ; 38020301 V_CMP_NE_I32_e64 s[14:15], v1, 0, 0, 0 ; D10A000E 00010101 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x11 ; C2040111 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x10 ; C2048110 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x7 ; C2050107 S_BUFFER_LOAD_DWORD s11, s[0:3], 0x5 ; C2058105 S_BUFFER_LOAD_DWORD s12, s[0:3], 0x2 ; C2060102 S_BUFFER_LOAD_DWORD s13, s[0:3], 0x1 ; C2068101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v15, s13 ; 7E1E020D S_AND_SAVEEXEC_B64 s[14:15], s[14:15] ; BE8E240E S_XOR_B64 s[14:15], exec, s[14:15] ; 898E0E7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[14:15] ; 88FE0E7E S_BUFFER_LOAD_DWORD s0, s[0:3], 0x3 ; C2000103 V_MOV_B32_e32 v1, s8 ; 7E020208 V_MOV_B32_e32 v12, s9 ; 7E180209 V_MOV_B32_e32 v2, s10 ; 7E04020A V_MOV_B32_e32 v14, s11 ; 7E1C020B V_MOV_B32_e32 v13, s12 ; 7E1A020C S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_SUB_F32_e32 v0, v15, v0 ; 0800010F V_CMP_LT_F32_e64 s[2:3], v0, 0.000000e+00, 0, 0 ; D0020002 00010100 V_CNDMASK_B32_e64 v0, 0, -1, s[2:3], 0, 0, 0, 0 ; D2000000 00098280 V_OR_B32_e32 v0, v0, v0 ; 38000100 V_CMP_NE_I32_e64 s[2:3], v0, 0, 0, 0 ; D10A0002 00010100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[2:3], s[2:3] ; BE822402 S_XOR_B64 s[2:3], exec, s[2:3] ; 8982027E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[2:3] ; 88FE027E V_MOV_B32_e32 v0, s0 ; 7E000200 S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_MAD_F32 v1, v3, v12, v1, 0, 0 ; D2820001 04061903 V_MAD_F32 v1, v1, v14, v2, 0, 0 ; D2820001 040A1D01 V_SUB_F32_e32 v2, v1, v13 ; 08041B01 V_CMP_LT_F32_e64 s[0:1], v2, 0.000000e+00, 0, 0 ; D0020000 00010102 V_CNDMASK_B32_e64 v2, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000002 00018280 V_OR_B32_e32 v2, v2, v2 ; 38040502 V_CMP_NE_I32_e64 s[0:1], v2, 0, 0, 0 ; D10A0000 00010102 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E V_SUB_F32_e32 v0, v0, v1 ; 08000300 V_CMP_LT_F32_e64 s[0:1], v0, 0.000000e+00, 0, 0 ; D0020000 00010100 V_CNDMASK_B32_e64 v0, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000000 00018280 V_OR_B32_e32 v0, v0, v0 ; 38000100 V_CMP_NE_I32_e64 s[0:1], v0, 0, 0, 0 ; D10A0000 00010100 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x8 ; C0C40708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020006 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v12, v3, v4 ; 10180903 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800F00 00010D06 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v6, v5, v16, v12, 0, 0 ; D2820006 04322105 V_MUL_F32_e32 v6, v6, v8 ; 100C1106 V_MUL_F32_e32 v7, v2, v4 ; 100E0902 V_MAD_F32 v7, v5, v15, v7, 0, 0 ; D2820007 041E1F05 V_MUL_F32_e32 v7, v7, v9 ; 100E1307 V_CVT_PKRTZ_F16_F32_e32 v6, v7, v6 ; 5E0C0D07 V_MUL_F32_e32 v7, v1, v4 ; 100E0901 V_MAD_F32 v7, v5, v14, v7, 0, 0 ; D2820007 041E1D05 V_MUL_F32_e32 v7, v7, v10 ; 100E1507 V_MUL_F32_e32 v0, v0, v4 ; 10000900 V_MAD_F32 v0, v5, v13, v0, 0, 0 ; D2820000 04021B05 V_MUL_F32_e32 v0, v0, v11 ; 10001700 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v7 ; 5E000F00 EXP 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..6] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[4].zwzw, IN[0] 1: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[0], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[3] 4: ADD TEMP[1].x, CONST[6].xxxx, CONST[6].yyyy 5: MUL TEMP[0], TEMP[0], TEMP[1].xxxx 6: MUL TEMP[1].w, CONST[5].wwww, IN[2].wwww 7: MOV TEMP[1].w, TEMP[1].wwww 8: MOV TEMP[2].xy, IN[1].xyxx 9: MOV TEMP[1].xyz, IN[2].xyzx 10: MOV TEMP[2].zw, IMM[0].yyxy 11: MOV OUT[1], TEMP[1] 12: MOV OUT[2], TEMP[2] 13: MOV OUT[0], TEMP[0] 14: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0 %32 = add i32 %5, %7 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fadd float %25, %34 %51 = fadd float %26, %35 %52 = fmul float %51, %17 %53 = fmul float %51, %18 %54 = fmul float %51, %19 %55 = fmul float %51, %20 %56 = fmul float %50, %13 %57 = fadd float %56, %52 %58 = fmul float %50, %14 %59 = fadd float %58, %53 %60 = fmul float %50, %15 %61 = fadd float %60, %54 %62 = fmul float %50, %16 %63 = fadd float %62, %55 %64 = fadd float %57, %21 %65 = fadd float %59, %22 %66 = fadd float %61, %23 %67 = fadd float %63, %24 %68 = fadd float %28, %29 %69 = fmul float %64, %68 %70 = fmul float %65, %68 %71 = fmul float %66, %68 %72 = fmul float %67, %68 %73 = fmul float %27, %49 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %73) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %69, float %70, float %71, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v4 ; 100A0804 EXP 15, 32, 0, 0, 0, v1, v2, v3, v5 ; F800020F 05030201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 1.000000e+00 ; 7E0A02F2 V_MOV_B32_e32 v6, 0.000000e+00 ; 7E0C0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_ADD_F32_e32 v4, s4, v0 ; 06080004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s4, v1 ; 06000204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v0 ; 10020004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x19 ; C2028119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MUL_F32_e32 v1, v1, v2 ; 10020501 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xe ; C202010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v4, s4, v5, 0, 0 ; D2820005 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v2 ; 100A0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v2 ; 10000500 EXP 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 S_ENDPGM ; BF810000 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[9] DCL CONST[0..6] DCL TEMP[0] DCL TEMP[1..9], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 1024.0000, 65280.0000} IMM[1] FLT32 {65280.0000, 255.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[9].xxxx, CONST[9].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[6].xyxx, CONST[6].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 8: UIF TEMP[5].xxxx :2 9: KILL 10: ENDIF 11: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 12: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 13: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 14: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 15: UIF TEMP[5].xxxx :2 16: KILL 17: ENDIF 18: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 19: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 20: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 21: OR TEMP[4].x, TEMP[4].xxxx, TEMP[2].yyyy 22: UIF TEMP[4].xxxx :2 23: KILL 24: ENDIF 25: ADD TEMP[3], CONST[0].wwww, -TEMP[1].yyyy 26: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 27: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 28: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 29: UIF TEMP[2].xxxx :2 30: KILL 31: ENDIF 32: MOV TEMP[3].y, IMM[0].yyyy 33: ADD TEMP[1].x, IMM[0].zzzz, -CONST[3].xxxx 34: MOV TEMP[3].x, TEMP[1].xxxx 35: FSNE TEMP[2].x, CONST[1].xxxx, -CONST[1].xxxx 36: UIF TEMP[2].xxxx :2 37: FSLT TEMP[2].x, IMM[0].yyyy, CONST[2].xxxx 38: UIF TEMP[2].xxxx :2 39: MUL TEMP[2].zw, CONST[4].xyxy, IN[2].xyxy 40: MOV TEMP[3].zw, TEMP[2].wwzw 41: FRC TEMP[2].xy, TEMP[2].zwzw 42: ADD TEMP[4].zw, TEMP[3], -TEMP[2].xyxy 43: MOV TEMP[3].zw, TEMP[4].wwzw 44: MAD TEMP[4].xy, IN[2], CONST[4], -TEMP[4].zwzw 45: ADD TEMP[5].zw, TEMP[3], IMM[0].yyyy 46: RCP TEMP[6].x, CONST[4].xxxx 47: RCP TEMP[7].x, CONST[4].yyyy 48: MOV TEMP[6].y, TEMP[7].xxxx 49: MUL TEMP[5].xy, TEMP[5].zwzw, TEMP[6] 50: MOV TEMP[5].xy, TEMP[5].xyyy 51: MOV TEMP[5].w, IMM[0].xxxx 52: TXL TEMP[5], TEMP[5], SAMP[0], 2D 53: MOV TEMP[6].zw, TEMP[5] 54: DP2 TEMP[7].x, TEMP[5].wxxx, IMM[1].xyyy 55: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy 56: FRC TEMP[8].w, TEMP[7].xxxx 57: ADD TEMP[7].z, -TEMP[8].wwww, TEMP[7].xxxx 58: MOV TEMP[3].z, TEMP[7].zzzz 59: MAD TEMP[5].w, CONST[3].xxxx, TEMP[5].xxxx, IMM[0].yyyy 60: FRC TEMP[7].z, TEMP[5].wwww 61: ADD TEMP[5].w, TEMP[5].wwww, -TEMP[7].zzzz 62: MOV TEMP[3].w, TEMP[5].wwww 63: ADD TEMP[5].zw, TEMP[3], IMM[0].yyyy 64: FSGE TEMP[7].x, TEMP[1].xxxx, IMM[0].xxxx 65: UIF TEMP[7].xxxx :2 66: MOV TEMP[7].x, TEMP[5].wwww 67: ELSE :2 68: MOV TEMP[7].x, TEMP[5].zzzz 69: ENDIF 70: RCP TEMP[5].x, CONST[2].xxxx 71: MAD TEMP[5].w, CONST[5].xxxx, TEMP[5].xxxx, IMM[0].yyyy 72: FRC TEMP[8].z, TEMP[5].wwww 73: ADD TEMP[5].w, TEMP[5].wwww, -TEMP[8].zzzz 74: MUL TEMP[8].z, TEMP[5].wwww, TEMP[7].xxxx 75: MOV TEMP[9].x, -TEMP[5].wwww 76: FSGE TEMP[8].x, TEMP[8].zzzz, IMM[0].xxxx 77: UIF TEMP[8].xxxx :2 78: MOV TEMP[8].x, TEMP[5].wwww 79: ELSE :2 80: MOV TEMP[8].x, TEMP[9].xxxx 81: ENDIF 82: MOV TEMP[2].z, TEMP[8].xxxx 83: RCP TEMP[9].x, TEMP[8].xxxx 84: MUL TEMP[9].w, TEMP[7].xxxx, TEMP[9].xxxx 85: FRC TEMP[9].w, TEMP[9].wwww 86: MOV TEMP[2].w, TEMP[9].wwww 87: MUL TEMP[8].x, TEMP[9].wwww, TEMP[8].xxxx 88: MOV TEMP[6].x, TEMP[8].xxxx 89: RCP TEMP[5].x, TEMP[5].wwww 90: MUL TEMP[5].y, TEMP[5].xxxx, TEMP[7].xxxx 91: MOV TEMP[6].y, TEMP[5].yyyy 92: FRC TEMP[5].zw, TEMP[6].xyxy 93: MOV TEMP[3].zw, TEMP[5].wwzw 94: ADD TEMP[5].zw, -TEMP[3], TEMP[6].xyxy 95: MOV TEMP[3].zw, TEMP[5].wwzw 96: ADD TEMP[4].zw, TEMP[4].xyxy, TEMP[3] 97: MOV TEMP[3].zw, TEMP[4].wwzw 98: MUL TEMP[4].zw, TEMP[3], CONST[2].xxxx 99: MOV TEMP[3].zw, TEMP[4].wwzw 100: FRC TEMP[4].xy, TEMP[4].zwzw 101: ADD TEMP[4].zw, TEMP[3], -TEMP[4].xyxy 102: MOV TEMP[3].zw, TEMP[4].wwzw 103: ADD TEMP[4].zw, TEMP[3], IMM[0].yyyy 104: MOV TEMP[3].w, TEMP[4].wwzw 105: RCP TEMP[2].x, CONST[5].xxxx 106: RCP TEMP[5].x, CONST[5].yyyy 107: MOV TEMP[2].y, TEMP[5].xxxx 108: MUL TEMP[4].xy, TEMP[4].zwzw, TEMP[2] 109: MOV TEMP[4].xy, TEMP[4].xyyy 110: MOV TEMP[4].w, IMM[0].xxxx 111: TXL TEMP[4], TEMP[4], SAMP[1], 2D 112: MOV TEMP[2], TEMP[4] 113: MOV TEMP[3].z, TEMP[4].wwww 114: ELSE :2 115: MOV TEMP[4].xy, IN[2].xyyy 116: TEX TEMP[4], TEMP[4], SAMP[1], 2D 117: MOV TEMP[2], TEMP[4] 118: MOV TEMP[3].z, TEMP[4].wwww 119: ENDIF 120: MUL TEMP[4].w, TEMP[3].zzzz, IN[1].wwww 121: MOV TEMP[4].w, TEMP[4].wwww 122: MOV TEMP[4].xyz, IN[1].xyzx 123: ELSE :2 124: FSLT TEMP[5].x, IMM[0].yyyy, CONST[2].xxxx 125: UIF TEMP[5].xxxx :2 126: MUL TEMP[5].zw, CONST[4].xyxy, IN[2].xyxy 127: MOV TEMP[3].zw, TEMP[5].wwzw 128: FRC TEMP[5].xy, TEMP[5].zwzw 129: MOV TEMP[2].xy, TEMP[5].xyxx 130: ADD TEMP[5].zw, TEMP[3], -TEMP[5].xyxy 131: MOV TEMP[3].zw, TEMP[5].wwzw 132: MAD TEMP[5].xy, IN[2], CONST[4], -TEMP[5].zwzw 133: MOV TEMP[2].xy, TEMP[5].xyxx 134: ADD TEMP[5].zw, TEMP[3], IMM[0].yyyy 135: RCP TEMP[6].x, CONST[4].xxxx 136: RCP TEMP[7].x, CONST[4].yyyy 137: MOV TEMP[6].y, TEMP[7].xxxx 138: MUL TEMP[5].xy, TEMP[5].zwzw, TEMP[6] 139: MOV TEMP[5].xy, TEMP[5].xyyy 140: MOV TEMP[5].w, IMM[0].xxxx 141: TXL TEMP[5], TEMP[5], SAMP[0], 2D 142: MOV TEMP[6].zw, TEMP[5] 143: DP2 TEMP[7].x, TEMP[5].wxxx, IMM[1].xyyy 144: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy 145: FRC TEMP[8].w, TEMP[7].xxxx 146: ADD TEMP[7].z, -TEMP[8].wwww, TEMP[7].xxxx 147: MOV TEMP[3].z, TEMP[7].zzzz 148: MAD TEMP[5].w, CONST[3].xxxx, TEMP[5].xxxx, IMM[0].yyyy 149: FRC TEMP[7].z, TEMP[5].wwww 150: MOV TEMP[2].z, TEMP[7].zzzz 151: ADD TEMP[5].w, TEMP[5].wwww, -TEMP[7].zzzz 152: MOV TEMP[3].w, TEMP[5].wwww 153: ADD TEMP[5].zw, TEMP[3], IMM[0].yyyy 154: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 155: UIF TEMP[1].xxxx :2 156: MOV TEMP[1].x, TEMP[5].wwww 157: ELSE :2 158: MOV TEMP[1].x, TEMP[5].zzzz 159: ENDIF 160: MOV TEMP[3].x, TEMP[1].xxxx 161: RCP TEMP[5].x, CONST[2].xxxx 162: MAD TEMP[5].y, CONST[5].xxxx, TEMP[5].xxxx, IMM[0].yyyy 163: FRC TEMP[7].z, TEMP[5].yyyy 164: ADD TEMP[5].y, -TEMP[7].zzzz, TEMP[5].yyyy 165: MUL TEMP[7].z, TEMP[5].yyyy, TEMP[1].xxxx 166: MOV TEMP[8].x, -TEMP[5].yyyy 167: FSGE TEMP[7].x, TEMP[7].zzzz, IMM[0].xxxx 168: UIF TEMP[7].xxxx :2 169: MOV TEMP[7].x, TEMP[5].yyyy 170: ELSE :2 171: MOV TEMP[7].x, TEMP[8].xxxx 172: ENDIF 173: MOV TEMP[3].z, TEMP[7].xxxx 174: RCP TEMP[7].x, TEMP[7].xxxx 175: MUL TEMP[1].w, TEMP[7].xxxx, TEMP[1].xxxx 176: FRC TEMP[1].w, TEMP[1].wwww 177: MOV TEMP[3].w, TEMP[1].wwww 178: RCP TEMP[1].x, TEMP[5].yyyy 179: MOV TEMP[3].y, TEMP[1].xxxx 180: MUL TEMP[1].xy, TEMP[3].wyzw, TEMP[3].zxzw 181: MOV TEMP[6].xy, TEMP[1].xyxx 182: FRC TEMP[1].xy, TEMP[6] 183: MOV TEMP[3].xy, TEMP[1].xyxx 184: ADD TEMP[1].xy, -TEMP[3], TEMP[6] 185: MOV TEMP[3].xy, TEMP[1].xyxx 186: ADD TEMP[1].xy, TEMP[2], TEMP[3] 187: MOV TEMP[3].xy, TEMP[1].xyxx 188: MUL TEMP[1].xy, TEMP[3], CONST[2].xxxx 189: MOV TEMP[3].xy, TEMP[1].xyxx 190: FRC TEMP[1].zw, TEMP[1].xyxy 191: MOV TEMP[3].zw, TEMP[1].wwzw 192: ADD TEMP[1].xy, -TEMP[1].zwzw, TEMP[3] 193: MOV TEMP[3].xy, TEMP[1].xyxx 194: ADD TEMP[1].xy, TEMP[3], IMM[0].yyyy 195: MOV TEMP[3].xy, TEMP[1].xyxx 196: RCP TEMP[2].x, CONST[5].xxxx 197: RCP TEMP[1].x, CONST[5].yyyy 198: MOV TEMP[2].y, TEMP[1].xxxx 199: MUL TEMP[1].xy, TEMP[3], TEMP[2] 200: MOV TEMP[1].xy, TEMP[1].xyyy 201: MOV TEMP[1].w, IMM[0].xxxx 202: TXL TEMP[1], TEMP[1], SAMP[1], 2D 203: MOV TEMP[3].xyz, TEMP[1] 204: MOV TEMP[2].x, TEMP[1].wwww 205: ELSE :2 206: MOV TEMP[1].xy, IN[2].xyyy 207: TEX TEMP[1], TEMP[1], SAMP[1], 2D 208: MOV TEMP[3].xyz, TEMP[1] 209: MOV TEMP[2].x, TEMP[1].wwww 210: ENDIF 211: MUL TEMP[1].w, TEMP[2].xxxx, IN[1].wwww 212: MOV TEMP[3].w, TEMP[1].wwww 213: MOV TEMP[4], TEMP[3] 214: ENDIF 215: MOV OUT[0], TEMP[4] 216: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %41 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %42 = load <8 x i32> addrspace(2)* %41, !tbaa !0 %43 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %44 = load <4 x i32> addrspace(2)* %43, !tbaa !0 %45 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %46 = load <8 x i32> addrspace(2)* %45, !tbaa !0 %47 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %48 = load <4 x i32> addrspace(2)* %47, !tbaa !0 %49 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %55 = fmul float %15, %39 %56 = fadd float %55, %40 %57 = fmul float %14, %35 %58 = fadd float %57, %37 %59 = fmul float %56, %36 %60 = fadd float %59, %38 %61 = fsub float -0.000000e+00, %24 %62 = fsub float -0.000000e+00, %26 %63 = fadd float %61, %58 %64 = fadd float %61, %58 %65 = fadd float %61, %58 %66 = fcmp olt float %63, 0.000000e+00 %67 = sext i1 %66 to i32 %68 = fcmp olt float %64, 0.000000e+00 %69 = sext i1 %68 to i32 %70 = fcmp olt float %65, 0.000000e+00 %71 = sext i1 %70 to i32 %72 = bitcast i32 %67 to float %73 = bitcast i32 %69 to float %74 = bitcast i32 %71 to float %75 = bitcast float %72 to i32 %76 = bitcast float %74 to i32 %77 = or i32 %75, %76 %78 = bitcast i32 %77 to float %79 = bitcast float %78 to i32 %80 = bitcast float %73 to i32 %81 = or i32 %79, %80 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = icmp ne i32 %83, 0 br i1 %84, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %85 = fsub float -0.000000e+00, %58 %86 = fadd float %25, %85 %87 = fsub float -0.000000e+00, %58 %88 = fadd float %25, %87 %89 = fsub float -0.000000e+00, %58 %90 = fadd float %25, %89 %91 = fcmp olt float %86, 0.000000e+00 %92 = sext i1 %91 to i32 %93 = fcmp olt float %88, 0.000000e+00 %94 = sext i1 %93 to i32 %95 = fcmp olt float %90, 0.000000e+00 %96 = sext i1 %95 to i32 %97 = bitcast i32 %92 to float %98 = bitcast i32 %94 to float %99 = bitcast i32 %96 to float %100 = bitcast float %97 to i32 %101 = bitcast float %99 to i32 %102 = or i32 %100, %101 %103 = bitcast i32 %102 to float %104 = bitcast float %103 to i32 %105 = bitcast float %98 to i32 %106 = or i32 %104, %105 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = icmp ne i32 %108, 0 br i1 %109, label %IF41, label %ENDIF40 IF41: ; preds = %ENDIF call void @llvm.AMDGPU.kilp() br label %ENDIF40 ENDIF40: ; preds = %ENDIF, %IF41 %110 = fadd float %62, %60 %111 = fadd float %62, %60 %112 = fadd float %62, %60 %113 = fcmp olt float %110, 0.000000e+00 %114 = sext i1 %113 to i32 %115 = fcmp olt float %111, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = fcmp olt float %112, 0.000000e+00 %118 = sext i1 %117 to i32 %119 = bitcast i32 %114 to float %120 = bitcast i32 %116 to float %121 = bitcast i32 %118 to float %122 = bitcast float %119 to i32 %123 = bitcast float %121 to i32 %124 = or i32 %122, %123 %125 = bitcast i32 %124 to float %126 = bitcast float %125 to i32 %127 = bitcast float %120 to i32 %128 = or i32 %126, %127 %129 = bitcast i32 %128 to float %130 = bitcast float %129 to i32 %131 = icmp ne i32 %130, 0 br i1 %131, label %IF44, label %ENDIF43 IF44: ; preds = %ENDIF40 call void @llvm.AMDGPU.kilp() br label %ENDIF43 ENDIF43: ; preds = %ENDIF40, %IF44 %132 = fsub float -0.000000e+00, %60 %133 = fadd float %27, %132 %134 = fsub float -0.000000e+00, %60 %135 = fadd float %27, %134 %136 = fsub float -0.000000e+00, %60 %137 = fadd float %27, %136 %138 = fcmp olt float %133, 0.000000e+00 %139 = sext i1 %138 to i32 %140 = fcmp olt float %135, 0.000000e+00 %141 = sext i1 %140 to i32 %142 = fcmp olt float %137, 0.000000e+00 %143 = sext i1 %142 to i32 %144 = bitcast i32 %139 to float %145 = bitcast i32 %141 to float %146 = bitcast i32 %143 to float %147 = bitcast float %144 to i32 %148 = bitcast float %146 to i32 %149 = or i32 %147, %148 %150 = bitcast i32 %149 to float %151 = bitcast float %150 to i32 %152 = bitcast float %145 to i32 %153 = or i32 %151, %152 %154 = bitcast i32 %153 to float %155 = bitcast float %154 to i32 %156 = icmp ne i32 %155, 0 br i1 %156, label %IF47, label %ENDIF46 IF47: ; preds = %ENDIF43 call void @llvm.AMDGPU.kilp() br label %ENDIF46 ENDIF46: ; preds = %ENDIF43, %IF47 %157 = fsub float -0.000000e+00, %30 %158 = fadd float 1.024000e+03, %157 %159 = fsub float -0.000000e+00, %28 %160 = fcmp une float %28, %159 %161 = sext i1 %160 to i32 %162 = bitcast i32 %161 to float %163 = bitcast float %162 to i32 %164 = icmp ne i32 %163, 0 %165 = fcmp olt float 5.000000e-01, %29 %166 = sext i1 %165 to i32 %167 = bitcast i32 %166 to float %168 = bitcast float %167 to i32 %169 = icmp ne i32 %168, 0 br i1 %164, label %IF50, label %ELSE51 IF50: ; preds = %ENDIF46 br i1 %169, label %IF53, label %ELSE54 ELSE51: ; preds = %ENDIF46 br i1 %169, label %IF62, label %ELSE63 ENDIF49: ; preds = %IF62, %ELSE63, %IF53, %ELSE54 %.sink72.sink = phi <4 x float> [ %284, %ELSE54 ], [ %277, %IF53 ], [ %396, %ELSE63 ], [ %386, %IF62 ] %temp16.0 = phi float [ %49, %ELSE54 ], [ %49, %IF53 ], [ %387, %IF62 ], [ %397, %ELSE63 ] %temp17.0 = phi float [ %50, %ELSE54 ], [ %50, %IF53 ], [ %388, %IF62 ], [ %398, %ELSE63 ] %temp18.0 = phi float [ %51, %ELSE54 ], [ %51, %IF53 ], [ %389, %IF62 ], [ %399, %ELSE63 ] %170 = extractelement <4 x float> %.sink72.sink, i32 3 %171 = fmul float %170, %52 %172 = call i32 @llvm.SI.packf16(float %temp16.0, float %temp17.0) %173 = bitcast i32 %172 to float %174 = call i32 @llvm.SI.packf16(float %temp18.0, float %171) %175 = bitcast i32 %174 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %173, float %175, float %173, float %175) ret void IF53: ; preds = %IF50 %176 = fmul float %31, %53 %177 = fmul float %32, %54 %178 = call float @llvm.AMDIL.fraction.(float %176) %179 = call float @llvm.AMDIL.fraction.(float %177) %180 = fsub float -0.000000e+00, %178 %181 = fadd float %176, %180 %182 = fsub float -0.000000e+00, %179 %183 = fadd float %177, %182 %184 = fsub float -0.000000e+00, %181 %185 = fmul float %53, %31 %186 = fadd float %185, %184 %187 = fsub float -0.000000e+00, %183 %188 = fmul float %54, %32 %189 = fadd float %188, %187 %190 = fadd float %181, 5.000000e-01 %191 = fadd float %183, 5.000000e-01 %192 = fdiv float 1.000000e+00, %31 %193 = fdiv float 1.000000e+00, %32 %194 = fmul float %190, %192 %195 = fmul float %191, %193 %196 = bitcast float %194 to i32 %197 = bitcast float %195 to i32 %198 = bitcast float 0.000000e+00 to i32 %199 = insertelement <4 x i32> undef, i32 %196, i32 0 %200 = insertelement <4 x i32> %199, i32 %197, i32 1 %201 = insertelement <4 x i32> %200, i32 %198, i32 2 %202 = insertelement <4 x i32> %201, i32 undef, i32 3 %203 = bitcast <8 x i32> %42 to <32 x i8> %204 = bitcast <4 x i32> %44 to <16 x i8> %205 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %202, <32 x i8> %203, <16 x i8> %204, i32 2) %206 = extractelement <4 x float> %205, i32 0 %207 = extractelement <4 x float> %205, i32 3 %208 = fmul float %207, 6.528000e+04 %209 = fmul float %206, 2.550000e+02 %210 = fadd float %208, %209 %211 = fadd float %210, 5.000000e-01 %212 = call float @llvm.AMDIL.fraction.(float %211) %213 = fsub float -0.000000e+00, %212 %214 = fadd float %213, %211 %215 = fmul float %30, %206 %216 = fadd float %215, 5.000000e-01 %217 = call float @llvm.AMDIL.fraction.(float %216) %218 = fsub float -0.000000e+00, %217 %219 = fadd float %216, %218 %220 = fadd float %214, 5.000000e-01 %221 = fadd float %219, 5.000000e-01 %222 = fcmp oge float %158, 0.000000e+00 %223 = sext i1 %222 to i32 %224 = bitcast i32 %223 to float %225 = bitcast float %224 to i32 %226 = icmp ne i32 %225, 0 %. = select i1 %226, float %221, float %220 %227 = fdiv float 1.000000e+00, %29 %228 = fmul float %33, %227 %229 = fadd float %228, 5.000000e-01 %230 = call float @llvm.AMDIL.fraction.(float %229) %231 = fsub float -0.000000e+00, %230 %232 = fadd float %229, %231 %233 = fmul float %232, %. %234 = fsub float -0.000000e+00, %232 %235 = fcmp oge float %233, 0.000000e+00 %236 = sext i1 %235 to i32 %237 = bitcast i32 %236 to float %238 = bitcast float %237 to i32 %239 = icmp ne i32 %238, 0 %temp32.0 = select i1 %239, float %232, float %234 %240 = fdiv float 1.000000e+00, %temp32.0 %241 = fmul float %., %240 %242 = call float @llvm.AMDIL.fraction.(float %241) %243 = fmul float %242, %temp32.0 %244 = fdiv float 1.000000e+00, %232 %245 = fmul float %244, %. %246 = call float @llvm.AMDIL.fraction.(float %243) %247 = call float @llvm.AMDIL.fraction.(float %245) %248 = fsub float -0.000000e+00, %246 %249 = fadd float %248, %243 %250 = fsub float -0.000000e+00, %247 %251 = fadd float %250, %245 %252 = fadd float %186, %249 %253 = fadd float %189, %251 %254 = fmul float %252, %29 %255 = fmul float %253, %29 %256 = call float @llvm.AMDIL.fraction.(float %254) %257 = call float @llvm.AMDIL.fraction.(float %255) %258 = fsub float -0.000000e+00, %256 %259 = fadd float %254, %258 %260 = fsub float -0.000000e+00, %257 %261 = fadd float %255, %260 %262 = fadd float %259, 5.000000e-01 %263 = fadd float %261, 5.000000e-01 %264 = fdiv float 1.000000e+00, %33 %265 = fdiv float 1.000000e+00, %34 %266 = fmul float %262, %264 %267 = fmul float %263, %265 %268 = bitcast float %266 to i32 %269 = bitcast float %267 to i32 %270 = bitcast float 0.000000e+00 to i32 %271 = insertelement <4 x i32> undef, i32 %268, i32 0 %272 = insertelement <4 x i32> %271, i32 %269, i32 1 %273 = insertelement <4 x i32> %272, i32 %270, i32 2 %274 = insertelement <4 x i32> %273, i32 undef, i32 3 %275 = bitcast <8 x i32> %46 to <32 x i8> %276 = bitcast <4 x i32> %48 to <16 x i8> %277 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %274, <32 x i8> %275, <16 x i8> %276, i32 2) br label %ENDIF49 ELSE54: ; preds = %IF50 %278 = bitcast float %53 to i32 %279 = bitcast float %54 to i32 %280 = insertelement <2 x i32> undef, i32 %278, i32 0 %281 = insertelement <2 x i32> %280, i32 %279, i32 1 %282 = bitcast <8 x i32> %46 to <32 x i8> %283 = bitcast <4 x i32> %48 to <16 x i8> %284 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %281, <32 x i8> %282, <16 x i8> %283, i32 2) br label %ENDIF49 IF62: ; preds = %ELSE51 %285 = fmul float %31, %53 %286 = fmul float %32, %54 %287 = call float @llvm.AMDIL.fraction.(float %285) %288 = call float @llvm.AMDIL.fraction.(float %286) %289 = fsub float -0.000000e+00, %287 %290 = fadd float %285, %289 %291 = fsub float -0.000000e+00, %288 %292 = fadd float %286, %291 %293 = fsub float -0.000000e+00, %290 %294 = fmul float %53, %31 %295 = fadd float %294, %293 %296 = fsub float -0.000000e+00, %292 %297 = fmul float %54, %32 %298 = fadd float %297, %296 %299 = fadd float %290, 5.000000e-01 %300 = fadd float %292, 5.000000e-01 %301 = fdiv float 1.000000e+00, %31 %302 = fdiv float 1.000000e+00, %32 %303 = fmul float %299, %301 %304 = fmul float %300, %302 %305 = bitcast float %303 to i32 %306 = bitcast float %304 to i32 %307 = bitcast float 0.000000e+00 to i32 %308 = insertelement <4 x i32> undef, i32 %305, i32 0 %309 = insertelement <4 x i32> %308, i32 %306, i32 1 %310 = insertelement <4 x i32> %309, i32 %307, i32 2 %311 = insertelement <4 x i32> %310, i32 undef, i32 3 %312 = bitcast <8 x i32> %42 to <32 x i8> %313 = bitcast <4 x i32> %44 to <16 x i8> %314 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %311, <32 x i8> %312, <16 x i8> %313, i32 2) %315 = extractelement <4 x float> %314, i32 0 %316 = extractelement <4 x float> %314, i32 3 %317 = fmul float %316, 6.528000e+04 %318 = fmul float %315, 2.550000e+02 %319 = fadd float %317, %318 %320 = fadd float %319, 5.000000e-01 %321 = call float @llvm.AMDIL.fraction.(float %320) %322 = fsub float -0.000000e+00, %321 %323 = fadd float %322, %320 %324 = fmul float %30, %315 %325 = fadd float %324, 5.000000e-01 %326 = call float @llvm.AMDIL.fraction.(float %325) %327 = fsub float -0.000000e+00, %326 %328 = fadd float %325, %327 %329 = fadd float %323, 5.000000e-01 %330 = fadd float %328, 5.000000e-01 %331 = fcmp oge float %158, 0.000000e+00 %332 = sext i1 %331 to i32 %333 = bitcast i32 %332 to float %334 = bitcast float %333 to i32 %335 = icmp ne i32 %334, 0 %.71 = select i1 %335, float %330, float %329 %336 = fdiv float 1.000000e+00, %29 %337 = fmul float %33, %336 %338 = fadd float %337, 5.000000e-01 %339 = call float @llvm.AMDIL.fraction.(float %338) %340 = fsub float -0.000000e+00, %339 %341 = fadd float %340, %338 %342 = fmul float %341, %.71 %343 = fsub float -0.000000e+00, %341 %344 = fcmp oge float %342, 0.000000e+00 %345 = sext i1 %344 to i32 %346 = bitcast i32 %345 to float %347 = bitcast float %346 to i32 %348 = icmp ne i32 %347, 0 %temp28.1 = select i1 %348, float %341, float %343 %349 = fdiv float 1.000000e+00, %temp28.1 %350 = fmul float %349, %.71 %351 = call float @llvm.AMDIL.fraction.(float %350) %352 = fdiv float 1.000000e+00, %341 %353 = fmul float %351, %temp28.1 %354 = fmul float %352, %.71 %355 = call float @llvm.AMDIL.fraction.(float %353) %356 = call float @llvm.AMDIL.fraction.(float %354) %357 = fsub float -0.000000e+00, %355 %358 = fadd float %357, %353 %359 = fsub float -0.000000e+00, %356 %360 = fadd float %359, %354 %361 = fadd float %295, %358 %362 = fadd float %298, %360 %363 = fmul float %361, %29 %364 = fmul float %362, %29 %365 = call float @llvm.AMDIL.fraction.(float %363) %366 = call float @llvm.AMDIL.fraction.(float %364) %367 = fsub float -0.000000e+00, %365 %368 = fadd float %367, %363 %369 = fsub float -0.000000e+00, %366 %370 = fadd float %369, %364 %371 = fadd float %368, 5.000000e-01 %372 = fadd float %370, 5.000000e-01 %373 = fdiv float 1.000000e+00, %33 %374 = fdiv float 1.000000e+00, %34 %375 = fmul float %371, %373 %376 = fmul float %372, %374 %377 = bitcast float %375 to i32 %378 = bitcast float %376 to i32 %379 = bitcast float 0.000000e+00 to i32 %380 = insertelement <4 x i32> undef, i32 %377, i32 0 %381 = insertelement <4 x i32> %380, i32 %378, i32 1 %382 = insertelement <4 x i32> %381, i32 %379, i32 2 %383 = insertelement <4 x i32> %382, i32 undef, i32 3 %384 = bitcast <8 x i32> %46 to <32 x i8> %385 = bitcast <4 x i32> %48 to <16 x i8> %386 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %383, <32 x i8> %384, <16 x i8> %385, i32 2) %387 = extractelement <4 x float> %386, i32 0 %388 = extractelement <4 x float> %386, i32 1 %389 = extractelement <4 x float> %386, i32 2 br label %ENDIF49 ELSE63: ; preds = %ELSE51 %390 = bitcast float %53 to i32 %391 = bitcast float %54 to i32 %392 = insertelement <2 x i32> undef, i32 %390, i32 0 %393 = insertelement <2 x i32> %392, i32 %391, i32 1 %394 = bitcast <8 x i32> %46 to <32 x i8> %395 = bitcast <4 x i32> %48 to <16 x i8> %396 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %393, <32 x i8> %394, <16 x i8> %395, i32 2) %397 = extractelement <4 x float> %396, i32 0 %398 = extractelement <4 x float> %396, i32 1 %399 = extractelement <4 x float> %396, i32 2 br label %ENDIF49 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v9, v0, 1, 1, [m0] ; C8240500 V_INTERP_P2_F32 v9, [v9], v1, 1, 1, [m0] ; C8250501 V_INTERP_P1_F32 v8, v0, 0, 1, [m0] ; C8200400 V_INTERP_P2_F32 v8, [v8], v1, 0, 1, [m0] ; C8210401 V_INTERP_P1_F32 v4, v0, 3, 0, [m0] ; C8100300 V_INTERP_P2_F32 v4, [v4], v1, 3, 0, [m0] ; C8110301 V_INTERP_P1_F32 v5, v0, 2, 0, [m0] ; C8140200 V_INTERP_P2_F32 v5, [v5], v1, 2, 0, [m0] ; C8150201 V_INTERP_P1_F32 v6, v0, 1, 0, [m0] ; C8180100 V_INTERP_P2_F32 v6, [v6], v1, 1, 0, [m0] ; C8190101 V_INTERP_P1_F32 v7, v0, 0, 0, [m0] ; C81C0000 V_INTERP_P2_F32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x18 ; C2040118 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x1a ; C204811A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s9 ; 7E000209 V_MAD_F32 v0, v2, s8, v0, 0, 0 ; D2820000 04001102 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v1, s8, v0 ; 0A020008 V_CMP_LT_F32_e64 s[8:9], v1, 0.000000e+00, 0, 0 ; D0020008 00010101 V_CNDMASK_B32_e64 v1, 0, -1, s[8:9], 0, 0, 0, 0 ; D2000001 00218280 V_OR_B32_e32 v1, v1, v1 ; 38020301 V_CMP_NE_I32_e64 s[14:15], v1, 0, 0, 0 ; D10A000E 00010101 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x25 ; C2040125 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x24 ; C2048124 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x1b ; C205011B S_BUFFER_LOAD_DWORD s12, s[0:3], 0x19 ; C2060119 S_BUFFER_LOAD_DWORD s13, s[0:3], 0x2 ; C2068102 S_BUFFER_LOAD_DWORD s11, s[0:3], 0x1 ; C2058101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s11 ; 7E02020B S_AND_SAVEEXEC_B64 s[14:15], s[14:15] ; BE8E240E S_XOR_B64 s[14:15], exec, s[14:15] ; 898E0E7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[14:15] ; 88FE0E7E S_BUFFER_LOAD_DWORD s11, s[0:3], 0x3 ; C2058103 V_MOV_B32_e32 v2, s8 ; 7E040208 V_MOV_B32_e32 v11, s9 ; 7E160209 V_MOV_B32_e32 v10, s10 ; 7E14020A V_MOV_B32_e32 v13, s12 ; 7E1A020C V_MOV_B32_e32 v12, s13 ; 7E18020D S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_SUB_F32_e32 v0, v1, v0 ; 08000101 V_CMP_LT_F32_e64 s[8:9], v0, 0.000000e+00, 0, 0 ; D0020008 00010100 V_CNDMASK_B32_e64 v0, 0, -1, s[8:9], 0, 0, 0, 0 ; D2000000 00218280 V_OR_B32_e32 v0, v0, v0 ; 38000100 V_CMP_NE_I32_e64 s[8:9], v0, 0, 0, 0 ; D10A0008 00010100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[12:13], s[8:9] ; BE8C2408 S_XOR_B64 s[12:13], exec, s[12:13] ; 898C0C7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[12:13] ; 88FE0C7E S_BUFFER_LOAD_DWORD s8, s[0:3], 0xc ; C204010C S_BUFFER_LOAD_DWORD s9, s[0:3], 0x8 ; C2048108 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x4 ; C2050104 V_MOV_B32_e32 v1, s11 ; 7E02020B S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_MAD_F32 v0, v3, v11, v2, 0, 0 ; D2820000 040A1703 V_MAD_F32 v2, v0, v13, v10, 0, 0 ; D2820002 042A1B00 V_SUB_F32_e32 v0, v2, v12 ; 08001902 V_CMP_LT_F32_e64 s[12:13], v0, 0.000000e+00, 0, 0 ; D002000C 00010100 V_CNDMASK_B32_e64 v0, 0, -1, s[12:13], 0, 0, 0, 0 ; D2000000 00318280 V_OR_B32_e32 v0, v0, v0 ; 38000100 V_CMP_NE_I32_e64 s[12:13], v0, 0, 0, 0 ; D10A000C 00010100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[14:15], s[12:13] ; BE8E240C S_XOR_B64 s[14:15], exec, s[14:15] ; 898E0E7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[14:15] ; 88FE0E7E S_BUFFER_LOAD_DWORD s11, s[0:3], 0x15 ; C2058115 S_BUFFER_LOAD_DWORD s12, s[0:3], 0x14 ; C2060114 S_BUFFER_LOAD_DWORD s13, s[0:3], 0x11 ; C2068111 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x10 ; C2000110 V_MOV_B32_e32 v3, s8 ; 7E060208 V_MOV_B32_e32 v0, s9 ; 7E000209 V_MOV_B32_e32 v10, s10 ; 7E14020A S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_SUB_F32_e32 v1, v1, v2 ; 08020501 V_CMP_LT_F32_e64 s[2:3], v1, 0.000000e+00, 0, 0 ; D0020002 00010101 V_CNDMASK_B32_e64 v1, 0, -1, s[2:3], 0, 0, 0, 0 ; D2000001 00098280 V_OR_B32_e32 v1, v1, v1 ; 38020301 V_CMP_NE_I32_e64 s[2:3], v1, 0, 0, 0 ; D10A0002 00010101 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[2:3], s[2:3] ; BE822402 S_XOR_B64 s[2:3], exec, s[2:3] ; 8982027E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[2:3] ; 88FE027E V_MOV_B32_e32 v1, s11 ; 7E02020B V_MOV_B32_e32 v2, s12 ; 7E04020C V_MOV_B32_e32 v12, s13 ; 7E18020D V_MOV_B32_e32 v11, s0 ; 7E160200 S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_MOV_B32_e32 v13, 0x80000000 ; 7E1A02FF 80000000 V_XOR_B32_e32 v13, v10, v13 ; 3A1A1B0A V_CMP_NEQ_F32_e32 vcc, v10, v13 ; 7C1A1B0A V_CNDMASK_B32_e64 v10, 0, -1, vcc, 0, 0, 0, 0 ; D200000A 01A98280 V_CMP_EQ_I32_e64 s[28:29], v10, 0, 0, 0 ; D104001C 0001010A V_SUB_F32_e32 v10, 1.024000e+03, v3 ; 081406FF 44800000 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x8 ; C0C40708 S_LOAD_DWORDX4 s[16:19], s[4:5], 0x0 ; C0880500 S_LOAD_DWORDX8 s[20:27], s[6:7], 0x0 ; C0CA0700 V_CMP_GT_F32_e64 s[4:5], v0, 5.000000e-01, 0, 0 ; D0080004 0001E100 V_CNDMASK_B32_e64 v13, 0, -1, s[4:5], 0, 0, 0, 0 ; D200000D 00118280 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[4:5], s[28:29] ; BE84241C S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E S_CBRANCH_EXECZ BB0_9 ; BF880000 V_CMP_EQ_I32_e64 s[6:7], v13, 0, 0, 0 ; D1040006 0001010D S_AND_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862406 S_XOR_B64 s[6:7], exec, s[6:7] ; 8986067E IMAGE_SAMPLE v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00020E08 S_WAITCNT vmcnt(0) ; BF8C0770 V_MOV_B32_e32 v18, v14 ; 7E24030E V_MOV_B32_e32 v19, v15 ; 7E26030F V_MOV_B32_e32 v20, v16 ; 7E280310 V_MOV_B32_e32 v21, v17 ; 7E2A0311 S_OR_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862506 S_XOR_B64 exec, exec, s[6:7] ; 89FE067E S_CBRANCH_EXECZ BB0_12 ; BF880000 V_MUL_F32_e32 v14, v9, v12 ; 101C1909 V_FRACT_F32_e32 v15, v14 ; 7E1E410E V_SUB_F32_e32 v15, v14, v15 ; 081E1F0E V_ADD_F32_e32 v16, 5.000000e-01, v15 ; 06201EF0 V_RCP_F32_e32 v17, v12 ; 7E22550C V_MUL_F32_e32 v17, v16, v17 ; 10222310 V_MUL_F32_e32 v20, v8, v11 ; 10281708 V_FRACT_F32_e32 v21, v20 ; 7E2A4114 V_SUB_F32_e32 v21, v20, v21 ; 082A2B14 V_ADD_F32_e32 v22, 5.000000e-01, v21 ; 062C2AF0 V_RCP_F32_e32 v23, v11 ; 7E2E550B V_MUL_F32_e32 v16, v22, v23 ; 10202F16 V_MOV_B32_e32 v18, 0 ; 7E240280 IMAGE_SAMPLE_L v[22:23], 9, 0, 0, 0, 0, 0, 0, 0, v[16:19], s[20:27], s[16:19] ; F0900900 00851610 V_MOV_B32_e32 v24, 2.550000e+02 ; 7E3002FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v24, v22, v24 ; 10303116 V_MOV_B32_e32 v25, 6.528000e+04 ; 7E3202FF 477F0000 V_MAD_F32 v24, v23, v25, v24, 0, 0 ; D2820018 04623317 V_ADD_F32_e32 v24, 5.000000e-01, v24 ; 063030F0 V_FRACT_F32_e32 v25, v24 ; 7E324118 V_SUB_F32_e32 v24, v24, v25 ; 08303318 V_ADD_F32_e32 v24, 5.000000e-01, v24 ; 063030F0 V_MAD_F32 v22, v3, v22, 5.000000e-01, 0, 0 ; D2820016 03C22D03 V_FRACT_F32_e32 v23, v22 ; 7E2E4116 V_SUB_F32_e32 v22, v22, v23 ; 082C2F16 V_ADD_F32_e32 v22, 5.000000e-01, v22 ; 062C2CF0 V_CMP_GE_F32_e64 s[28:29], v10, 0.000000e+00, 0, 0 ; D00C001C 0001010A V_CNDMASK_B32_e64 v22, v24, v22, s[28:29], 0, 0, 0, 0 ; D2000016 00722D18 V_RCP_F32_e32 v23, v0 ; 7E2E5500 V_MAD_F32 v23, v2, v23, 5.000000e-01, 0, 0 ; D2820017 03C22F02 V_FRACT_F32_e32 v24, v23 ; 7E304117 V_SUB_F32_e32 v23, v23, v24 ; 082E3117 V_MUL_F32_e32 v24, v23, v22 ; 10302D17 V_CMP_GE_F32_e64 s[28:29], v24, 0.000000e+00, 0, 0 ; D00C001C 00010118 V_MOV_B32_e32 v24, 0x80000000 ; 7E3002FF 80000000 V_XOR_B32_e32 v24, v23, v24 ; 3A303117 V_CNDMASK_B32_e64 v24, v24, v23, s[28:29], 0, 0, 0, 0 ; D2000018 00722F18 V_RCP_F32_e32 v25, v24 ; 7E325518 V_MUL_F32_e32 v25, v25, v22 ; 10322D19 V_FRACT_F32_e32 v25, v25 ; 7E324119 V_MUL_F32_e32 v24, v25, v24 ; 10303119 V_FRACT_F32_e32 v25, v24 ; 7E324118 V_SUB_F32_e32 v24, v24, v25 ; 08303318 V_SUB_F32_e32 v20, v20, v21 ; 08282B14 V_ADD_F32_e32 v20, v20, v24 ; 06283114 V_MUL_F32_e32 v20, v20, v0 ; 10280114 V_FRACT_F32_e32 v21, v20 ; 7E2A4114 V_SUB_F32_e32 v20, v20, v21 ; 08282B14 V_ADD_F32_e32 v20, 5.000000e-01, v20 ; 062828F0 V_RCP_F32_e32 v21, v2 ; 7E2A5502 V_MUL_F32_e32 v16, v20, v21 ; 10202B14 V_RCP_F32_e32 v20, v23 ; 7E285517 V_MUL_F32_e32 v20, v20, v22 ; 10282D14 V_FRACT_F32_e32 v21, v20 ; 7E2A4114 V_SUB_F32_e32 v20, v20, v21 ; 08282B14 V_SUB_F32_e32 v14, v14, v15 ; 081C1F0E V_ADD_F32_e32 v14, v14, v20 ; 061C290E V_MUL_F32_e32 v14, v14, v0 ; 101C010E V_FRACT_F32_e32 v15, v14 ; 7E1E410E V_SUB_F32_e32 v14, v14, v15 ; 081C1F0E V_ADD_F32_e32 v14, 5.000000e-01, v14 ; 061C1CF0 V_RCP_F32_e32 v15, v1 ; 7E1E5501 V_MUL_F32_e32 v17, v14, v15 ; 10221F0E IMAGE_SAMPLE_L v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[16:19], s[8:15], s[0:3] ; F0900F00 00020E10 S_WAITCNT vmcnt(0) ; BF8C0770 V_MOV_B32_e32 v18, v14 ; 7E24030E V_MOV_B32_e32 v19, v15 ; 7E26030F V_MOV_B32_e32 v20, v16 ; 7E280310 V_MOV_B32_e32 v21, v17 ; 7E2A0311 S_OR_B64 exec, exec, s[6:7] ; 88FE067E S_OR_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842504 S_XOR_B64 exec, exec, s[4:5] ; 89FE047E S_CBRANCH_EXECZ BB0_14 ; BF880000 V_CMP_EQ_I32_e64 s[6:7], v13, 0, 0, 0 ; D1040006 0001010D S_AND_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862406 S_XOR_B64 s[6:7], exec, s[6:7] ; 8986067E IMAGE_SAMPLE v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00021208 V_MOV_B32_e32 v16, v5 ; 7E200305 V_MOV_B32_e32 v15, v6 ; 7E1E0306 V_MOV_B32_e32 v14, v7 ; 7E1C0307 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862506 S_WAITCNT expcnt(0) ; BF8C070F S_XOR_B64 exec, exec, s[6:7] ; 89FE067E S_CBRANCH_EXECZ BB0_13 ; BF880000 V_MUL_F32_e32 v13, v9, v12 ; 101A1909 V_FRACT_F32_e32 v14, v13 ; 7E1C410D V_SUB_F32_e32 v14, v13, v14 ; 081C1D0D V_ADD_F32_e32 v15, 5.000000e-01, v14 ; 061E1CF0 V_RCP_F32_e32 v12, v12 ; 7E18550C V_MUL_F32_e32 v16, v15, v12 ; 1020190F V_MUL_F32_e32 v8, v8, v11 ; 10101708 V_FRACT_F32_e32 v9, v8 ; 7E124108 V_SUB_F32_e32 v9, v8, v9 ; 08121308 V_ADD_F32_e32 v12, 5.000000e-01, v9 ; 061812F0 V_RCP_F32_e32 v11, v11 ; 7E16550B V_MUL_F32_e32 v15, v12, v11 ; 101E170C V_MOV_B32_e32 v17, 0 ; 7E220280 IMAGE_SAMPLE_L v[11:12], 9, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[20:27], s[16:19] ; F0900900 00850B0F V_MOV_B32_e32 v19, 2.550000e+02 ; 7E2602FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v19, v11, v19 ; 1026270B V_MOV_B32_e32 v20, 6.528000e+04 ; 7E2802FF 477F0000 V_MAD_F32 v19, v12, v20, v19, 0, 0 ; D2820013 044E290C V_ADD_F32_e32 v19, 5.000000e-01, v19 ; 062626F0 V_FRACT_F32_e32 v20, v19 ; 7E284113 V_SUB_F32_e32 v19, v19, v20 ; 08262913 V_ADD_F32_e32 v19, 5.000000e-01, v19 ; 062626F0 V_MAD_F32 v3, v3, v11, 5.000000e-01, 0, 0 ; D2820003 03C21703 V_FRACT_F32_e32 v11, v3 ; 7E164103 V_SUB_F32_e32 v3, v3, v11 ; 08061703 V_ADD_F32_e32 v3, 5.000000e-01, v3 ; 060606F0 V_CMP_GE_F32_e64 s[28:29], v10, 0.000000e+00, 0, 0 ; D00C001C 0001010A V_CNDMASK_B32_e64 v3, v19, v3, s[28:29], 0, 0, 0, 0 ; D2000003 00720713 V_RCP_F32_e32 v10, v0 ; 7E145500 V_MAD_F32 v10, v2, v10, 5.000000e-01, 0, 0 ; D282000A 03C21502 V_FRACT_F32_e32 v11, v10 ; 7E16410A V_SUB_F32_e32 v10, v10, v11 ; 0814170A V_MUL_F32_e32 v11, v10, v3 ; 1016070A V_CMP_GE_F32_e64 s[28:29], v11, 0.000000e+00, 0, 0 ; D00C001C 0001010B V_MOV_B32_e32 v11, 0x80000000 ; 7E1602FF 80000000 V_XOR_B32_e32 v11, v10, v11 ; 3A16170A V_CNDMASK_B32_e64 v11, v11, v10, s[28:29], 0, 0, 0, 0 ; D200000B 0072150B V_RCP_F32_e32 v12, v11 ; 7E18550B V_MUL_F32_e32 v12, v3, v12 ; 10181903 V_FRACT_F32_e32 v12, v12 ; 7E18410C V_MUL_F32_e32 v11, v12, v11 ; 1016170C V_FRACT_F32_e32 v12, v11 ; 7E18410B V_SUB_F32_e32 v11, v11, v12 ; 0816190B V_SUB_F32_e32 v8, v8, v9 ; 08101308 V_ADD_F32_e32 v8, v8, v11 ; 06101708 V_MUL_F32_e32 v8, v8, v0 ; 10100108 V_FRACT_F32_e32 v9, v8 ; 7E124108 V_SUB_F32_e32 v8, v8, v9 ; 08101308 V_ADD_F32_e32 v8, 5.000000e-01, v8 ; 061010F0 V_RCP_F32_e32 v2, v2 ; 7E045502 V_MUL_F32_e32 v15, v8, v2 ; 101E0508 V_RCP_F32_e32 v2, v10 ; 7E04550A V_MUL_F32_e32 v2, v2, v3 ; 10040702 V_FRACT_F32_e32 v3, v2 ; 7E064102 V_SUB_F32_e32 v2, v2, v3 ; 08040702 V_SUB_F32_e32 v3, v13, v14 ; 08061D0D V_ADD_F32_e32 v2, v3, v2 ; 06040503 V_MUL_F32_e32 v0, v2, v0 ; 10000102 V_FRACT_F32_e32 v2, v0 ; 7E044100 V_SUB_F32_e32 v0, v0, v2 ; 08000500 V_ADD_F32_e32 v0, 5.000000e-01, v0 ; 060000F0 V_RCP_F32_e32 v1, v1 ; 7E025501 V_MUL_F32_e32 v16, v0, v1 ; 10200300 IMAGE_SAMPLE_L v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[8:15], s[0:3] ; F0900F00 0002120F V_MOV_B32_e32 v16, v5 ; 7E200305 V_MOV_B32_e32 v15, v6 ; 7E1E0306 V_MOV_B32_e32 v14, v7 ; 7E1C0307 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_B64 exec, exec, s[6:7] ; 88FE067E S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_CVT_PKRTZ_F16_F32_e32 v0, v14, v15 ; 5E001F0E V_MUL_F32_e32 v1, v21, v4 ; 10020915 V_CVT_PKRTZ_F16_F32_e32 v1, v16, v1 ; 5E020310 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v4 ; 10020804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 EXP 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v2, v6, v5, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v1, v1, v7, v6, 0, 0 ; D2820001 041A0F01 V_MOV_B32_e32 v2, 1.000000e+00 ; 7E0402F2 V_MOV_B32_e32 v3, 0.000000e+00 ; 7E060280 EXP 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v4, s4 ; 7E080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v0, v1, v6, v5, 0, 0 ; D2820000 04160D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v0 ; 10020004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x23 ; C2020123 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x11 ; C2028111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MUL_F32_e32 v1, v1, v2 ; 10020501 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x22 ; C2020122 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v4, s4, v5, 0, 0 ; D2820005 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v2 ; 100A0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x20 ; C2000120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v2 ; 10000500 EXP 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: MUL TEMP[5].z, TEMP[3].xxxx, IMM[0].wwww 24: MAD TEMP[6].w, TEMP[3].wwww, IMM[0].wwww, IMM[0].xxxx 25: FRC TEMP[7].z, TEMP[6].wwww 26: ADD TEMP[6].w, TEMP[6].wwww, -TEMP[7].zzzz 27: MAD TEMP[5].z, TEMP[6].wwww, IMM[1].xxxx, TEMP[5].zzzz 28: ADD TEMP[5].z, TEMP[5].zzzz, IMM[0].xxxx 29: FRC TEMP[6].w, TEMP[5].zzzz 30: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].zzzz 31: MOV TEMP[0].z, TEMP[5].zzzz 32: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 33: FRC TEMP[5].z, TEMP[3].wwww 34: MOV TEMP[1].z, TEMP[5].zzzz 35: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 36: MOV TEMP[0].w, TEMP[3].wwww 37: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 38: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 39: UIF TEMP[4].xxxx :0 40: MOV TEMP[4].x, TEMP[3].wwww 41: ELSE :0 42: MOV TEMP[4].x, TEMP[3].zzzz 43: ENDIF 44: MOV TEMP[0].y, TEMP[4].xxxx 45: RCP TEMP[3].x, CONST[0].xxxx 46: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 47: FRC TEMP[5].z, TEMP[3].xxxx 48: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 49: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 50: MOV TEMP[6].x, -TEMP[3].xxxx 51: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 52: UIF TEMP[5].xxxx :0 53: MOV TEMP[5].x, TEMP[3].xxxx 54: ELSE :0 55: MOV TEMP[5].x, TEMP[6].xxxx 56: ENDIF 57: MOV TEMP[0].z, TEMP[5].xxxx 58: RCP TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 60: FRC TEMP[4].w, TEMP[4].wwww 61: MOV TEMP[0].w, TEMP[4].wwww 62: RCP TEMP[0].x, TEMP[3].xxxx 63: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 64: MOV TEMP[2].xy, TEMP[3].xyxx 65: FRC TEMP[3].xy, TEMP[2] 66: MOV TEMP[0].xy, TEMP[3].xyxx 67: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: ADD TEMP[2].xy, TEMP[1], TEMP[0] 70: MOV TEMP[0].xy, TEMP[2].xyxx 71: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: FRC TEMP[2].zw, TEMP[2].xyxy 74: MOV TEMP[0].zw, TEMP[2].wwzw 75: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 76: MOV TEMP[0].xy, TEMP[2].xyxx 77: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 78: MOV TEMP[0].xy, TEMP[2].xyxx 79: RCP TEMP[1].x, CONST[3].xxxx 80: RCP TEMP[2].x, CONST[3].yyyy 81: MOV TEMP[1].y, TEMP[2].xxxx 82: MUL TEMP[1].xy, TEMP[0], TEMP[1] 83: MOV TEMP[1].xy, TEMP[1].xyyy 84: MOV TEMP[1].w, IMM[0].zzzz 85: TXL TEMP[1], TEMP[1], SAMP[1], 2D 86: MOV TEMP[0], TEMP[1] 87: ELSE :0 88: MOV TEMP[1].xy, IN[1].xyyy 89: TEX TEMP[1], TEMP[1], SAMP[1], 2D 90: MOV TEMP[0], TEMP[1] 91: ENDIF 92: MUL TEMP[0], TEMP[0], IN[0] 93: MOV OUT[0], TEMP[0] 94: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = fcmp olt float 5.000000e-01, %24 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 br i1 %48, label %IF, label %ELSE IF: ; preds = %main_body %49 = fmul float %26, %42 %50 = fmul float %27, %43 %51 = call float @llvm.AMDIL.fraction.(float %49) %52 = call float @llvm.AMDIL.fraction.(float %50) %53 = fsub float -0.000000e+00, %51 %54 = fadd float %49, %53 %55 = fsub float -0.000000e+00, %52 %56 = fadd float %50, %55 %57 = fsub float -0.000000e+00, %54 %58 = fmul float %42, %26 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %43, %27 %62 = fadd float %61, %60 %63 = fadd float %54, 5.000000e-01 %64 = fadd float %56, 5.000000e-01 %65 = fdiv float 1.000000e+00, %26 %66 = fdiv float 1.000000e+00, %27 %67 = fmul float %63, %65 %68 = fmul float %64, %66 %69 = bitcast float %67 to i32 %70 = bitcast float %68 to i32 %71 = bitcast float 0.000000e+00 to i32 %72 = insertelement <4 x i32> undef, i32 %69, i32 0 %73 = insertelement <4 x i32> %72, i32 %70, i32 1 %74 = insertelement <4 x i32> %73, i32 %71, i32 2 %75 = insertelement <4 x i32> %74, i32 undef, i32 3 %76 = bitcast <8 x i32> %31 to <32 x i8> %77 = bitcast <4 x i32> %33 to <16 x i8> %78 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 3 %81 = fsub float -0.000000e+00, %25 %82 = fadd float 1.024000e+03, %81 %83 = fmul float %79, 2.550000e+02 %84 = fmul float %80, 2.550000e+02 %85 = fadd float %84, 5.000000e-01 %86 = call float @llvm.AMDIL.fraction.(float %85) %87 = fsub float -0.000000e+00, %86 %88 = fadd float %85, %87 %89 = fmul float %88, 2.560000e+02 %90 = fadd float %89, %83 %91 = fadd float %90, 5.000000e-01 %92 = call float @llvm.AMDIL.fraction.(float %91) %93 = fsub float -0.000000e+00, %92 %94 = fadd float %93, %91 %95 = fmul float %25, %79 %96 = fadd float %95, 5.000000e-01 %97 = call float @llvm.AMDIL.fraction.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %96, %98 %100 = fadd float %94, 5.000000e-01 %101 = fadd float %99, 5.000000e-01 %102 = fcmp oge float %82, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float %101, float %100 %107 = fdiv float 1.000000e+00, %24 %108 = fmul float %28, %107 %109 = fadd float %108, 5.000000e-01 %110 = call float @llvm.AMDIL.fraction.(float %109) %111 = fsub float -0.000000e+00, %110 %112 = fadd float %111, %109 %113 = fmul float %112, %. %114 = fsub float -0.000000e+00, %112 %115 = fcmp oge float %113, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %116 to float %118 = bitcast float %117 to i32 %119 = icmp ne i32 %118, 0 %temp20.0 = select i1 %119, float %112, float %114 %120 = fdiv float 1.000000e+00, %temp20.0 %121 = fmul float %120, %. %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = fdiv float 1.000000e+00, %112 %124 = fmul float %122, %temp20.0 %125 = fmul float %123, %. %126 = call float @llvm.AMDIL.fraction.(float %124) %127 = call float @llvm.AMDIL.fraction.(float %125) %128 = fsub float -0.000000e+00, %126 %129 = fadd float %128, %124 %130 = fsub float -0.000000e+00, %127 %131 = fadd float %130, %125 %132 = fadd float %59, %129 %133 = fadd float %62, %131 %134 = fmul float %132, %24 %135 = fmul float %133, %24 %136 = call float @llvm.AMDIL.fraction.(float %134) %137 = call float @llvm.AMDIL.fraction.(float %135) %138 = fsub float -0.000000e+00, %136 %139 = fadd float %138, %134 %140 = fsub float -0.000000e+00, %137 %141 = fadd float %140, %135 %142 = fadd float %139, 5.000000e-01 %143 = fadd float %141, 5.000000e-01 %144 = fdiv float 1.000000e+00, %28 %145 = fdiv float 1.000000e+00, %29 %146 = fmul float %142, %144 %147 = fmul float %143, %145 %148 = bitcast float %146 to i32 %149 = bitcast float %147 to i32 %150 = bitcast float 0.000000e+00 to i32 %151 = insertelement <4 x i32> undef, i32 %148, i32 0 %152 = insertelement <4 x i32> %151, i32 %149, i32 1 %153 = insertelement <4 x i32> %152, i32 %150, i32 2 %154 = insertelement <4 x i32> %153, i32 undef, i32 3 %155 = bitcast <8 x i32> %35 to <32 x i8> %156 = bitcast <4 x i32> %37 to <16 x i8> %157 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %154, <32 x i8> %155, <16 x i8> %156, i32 2) %158 = extractelement <4 x float> %157, i32 0 %159 = extractelement <4 x float> %157, i32 1 %160 = extractelement <4 x float> %157, i32 2 br label %ENDIF ELSE: ; preds = %main_body %161 = bitcast float %42 to i32 %162 = bitcast float %43 to i32 %163 = insertelement <2 x i32> undef, i32 %161, i32 0 %164 = insertelement <2 x i32> %163, i32 %162, i32 1 %165 = bitcast <8 x i32> %35 to <32 x i8> %166 = bitcast <4 x i32> %37 to <16 x i8> %167 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %164, <32 x i8> %165, <16 x i8> %166, i32 2) %168 = extractelement <4 x float> %167, i32 0 %169 = extractelement <4 x float> %167, i32 1 %170 = extractelement <4 x float> %167, i32 2 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %167, %ELSE ], [ %157, %IF ] %temp2.0 = phi float [ %160, %IF ], [ %170, %ELSE ] %temp1.0 = phi float [ %159, %IF ], [ %169, %ELSE ] %temp.0 = phi float [ %158, %IF ], [ %168, %ELSE ] %171 = extractelement <4 x float> %.sink, i32 3 %172 = fmul float %temp.0, %38 %173 = fmul float %temp1.0, %39 %174 = fmul float %temp2.0, %40 %175 = fmul float %171, %41 %176 = call i32 @llvm.SI.packf16(float %172, float %173) %177 = bitcast i32 %176 to float %178 = call i32 @llvm.SI.packf16(float %174, float %175) %179 = bitcast i32 %178 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %177, float %179, float %177, float %179) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v7, v0, 1, 1, [m0] ; C81C0500 V_INTERP_P2_F32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 V_INTERP_P1_F32 v6, v0, 0, 1, [m0] ; C8180400 V_INTERP_P2_F32 v6, [v6], v1, 0, 1, [m0] ; C8190401 V_INTERP_P1_F32 v2, v0, 3, 0, [m0] ; C8080300 V_INTERP_P2_F32 v2, [v2], v1, 3, 0, [m0] ; C8090301 V_INTERP_P1_F32 v3, v0, 2, 0, [m0] ; C80C0200 V_INTERP_P2_F32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 V_INTERP_P1_F32 v4, v0, 1, 0, [m0] ; C8100100 V_INTERP_P2_F32 v4, [v4], v1, 1, 0, [m0] ; C8110101 V_INTERP_P1_F32 v5, v0, 0, 0, [m0] ; C8140000 V_INTERP_P2_F32 v5, [v5], v1, 0, 0, [m0] ; C8150001 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x4 ; C0840504 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x8 ; C0C60708 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s20, s[0:3], 0x0 ; C20A0100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_CMP_GT_F32_e64 s[22:23], s20, 5.000000e-01, 0, 0 ; D0080016 0001E014 V_CNDMASK_B32_e64 v0, 0, -1, s[22:23], 0, 0, 0, 0 ; D2000000 00598280 V_CMP_EQ_I32_e64 s[22:23], v0, 0, 0, 0 ; D1040016 00010100 S_AND_SAVEEXEC_B64 s[22:23], s[22:23] ; BE962416 S_XOR_B64 s[22:23], exec, s[22:23] ; 8996167E IMAGE_SAMPLE v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800F00 00430806 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_SAVEEXEC_B64 s[22:23], s[22:23] ; BE962516 S_XOR_B64 exec, exec, s[22:23] ; 89FE167E S_CBRANCH_EXECZ BB0_4 ; BF880000 S_BUFFER_LOAD_DWORD s21, s[0:3], 0xd ; C20A810D S_BUFFER_LOAD_DWORD s24, s[0:3], 0xc ; C20C010C S_BUFFER_LOAD_DWORD s25, s[0:3], 0x9 ; C20C8109 S_BUFFER_LOAD_DWORD s26, s[0:3], 0x8 ; C20D0108 S_BUFFER_LOAD_DWORD s27, s[0:3], 0x4 ; C20D8104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s21 ; 7E000215 V_MOV_B32_e32 v1, s24 ; 7E020218 V_MOV_B32_e32 v8, s25 ; 7E100219 V_MOV_B32_e32 v9, s26 ; 7E12021A V_MOV_B32_e32 v10, s27 ; 7E14021B V_MOV_B32_e32 v11, s20 ; 7E160214 V_MUL_F32_e32 v12, v7, v8 ; 10181107 V_FRACT_F32_e32 v13, v12 ; 7E1A410C V_SUB_F32_e32 v13, v12, v13 ; 081A1B0C V_ADD_F32_e32 v14, 5.000000e-01, v13 ; 061C1AF0 V_RCP_F32_e32 v8, v8 ; 7E105508 V_MUL_F32_e32 v15, v14, v8 ; 101E110E V_MUL_F32_e32 v6, v6, v9 ; 100C1306 V_FRACT_F32_e32 v7, v6 ; 7E0E4106 V_SUB_F32_e32 v7, v6, v7 ; 080E0F06 V_ADD_F32_e32 v8, 5.000000e-01, v7 ; 06100EF0 V_RCP_F32_e32 v9, v9 ; 7E125509 V_MUL_F32_e32 v14, v8, v9 ; 101C1308 V_MOV_B32_e32 v16, 0 ; 7E200280 S_LOAD_DWORDX4 s[24:27], s[4:5], 0x0 ; C08C0500 S_LOAD_DWORDX8 s[28:35], s[6:7], 0x0 ; C0CE0700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE_L v[8:9], 9, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[28:35], s[24:27] ; F0900900 00C7080E V_MOV_B32_e32 v18, 2.550000e+02 ; 7E2402FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v19, v9, v18, 5.000000e-01, 0, 0 ; D2820013 03C22509 V_FRACT_F32_e32 v20, v19 ; 7E284113 V_SUB_F32_e32 v19, v19, v20 ; 08262913 V_MUL_F32_e32 v19, 2.560000e+02, v19 ; 102626FF 43800000 V_MAD_F32 v18, v8, v18, v19, 0, 0 ; D2820012 044E2508 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_FRACT_F32_e32 v19, v18 ; 7E264112 V_SUB_F32_e32 v18, v18, v19 ; 08242712 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_MAD_F32 v8, v10, v8, 5.000000e-01, 0, 0 ; D2820008 03C2110A V_FRACT_F32_e32 v9, v8 ; 7E124108 V_SUB_F32_e32 v8, v8, v9 ; 08101308 V_ADD_F32_e32 v8, 5.000000e-01, v8 ; 061010F0 V_SUB_F32_e32 v9, 1.024000e+03, v10 ; 081214FF 44800000 V_CMP_GE_F32_e64 s[24:25], v9, 0.000000e+00, 0, 0 ; D00C0018 00010109 V_CNDMASK_B32_e64 v8, v18, v8, s[24:25], 0, 0, 0, 0 ; D2000008 00621112 V_RCP_F32_e32 v9, v11 ; 7E12550B V_MAD_F32 v9, v1, v9, 5.000000e-01, 0, 0 ; D2820009 03C21301 V_FRACT_F32_e32 v10, v9 ; 7E144109 V_SUB_F32_e32 v9, v9, v10 ; 08121509 V_MUL_F32_e32 v10, v9, v8 ; 10141109 V_CMP_GE_F32_e64 s[24:25], v10, 0.000000e+00, 0, 0 ; D00C0018 0001010A V_MOV_B32_e32 v10, 0x80000000 ; 7E1402FF 80000000 V_XOR_B32_e32 v10, v9, v10 ; 3A141509 V_CNDMASK_B32_e64 v10, v10, v9, s[24:25], 0, 0, 0, 0 ; D200000A 0062130A V_RCP_F32_e32 v18, v10 ; 7E24550A V_MUL_F32_e32 v18, v18, v8 ; 10241112 V_FRACT_F32_e32 v18, v18 ; 7E244112 V_MUL_F32_e32 v10, v18, v10 ; 10141512 V_FRACT_F32_e32 v18, v10 ; 7E24410A V_SUB_F32_e32 v10, v10, v18 ; 0814250A V_SUB_F32_e32 v6, v6, v7 ; 080C0F06 V_ADD_F32_e32 v6, v6, v10 ; 060C1506 V_MUL_F32_e32 v6, v6, v11 ; 100C1706 V_FRACT_F32_e32 v7, v6 ; 7E0E4106 V_SUB_F32_e32 v6, v6, v7 ; 080C0F06 V_ADD_F32_e32 v6, 5.000000e-01, v6 ; 060C0CF0 V_RCP_F32_e32 v1, v1 ; 7E025501 V_MUL_F32_e32 v14, v6, v1 ; 101C0306 V_RCP_F32_e32 v1, v9 ; 7E025509 V_MUL_F32_e32 v1, v1, v8 ; 10021101 V_FRACT_F32_e32 v6, v1 ; 7E0C4101 V_SUB_F32_e32 v1, v1, v6 ; 08020D01 V_SUB_F32_e32 v6, v12, v13 ; 080C1B0C V_ADD_F32_e32 v1, v6, v1 ; 06020306 V_MUL_F32_e32 v1, v1, v11 ; 10021701 V_FRACT_F32_e32 v6, v1 ; 7E0C4101 V_SUB_F32_e32 v1, v1, v6 ; 08020D01 V_ADD_F32_e32 v1, 5.000000e-01, v1 ; 060202F0 V_RCP_F32_e32 v0, v0 ; 7E005500 V_MUL_F32_e32 v15, v1, v0 ; 101E0101 IMAGE_SAMPLE_L v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[12:19], s[8:11] ; F0900F00 0043080E S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_B64 exec, exec, s[22:23] ; 88FE167E V_MUL_F32_e32 v0, v9, v4 ; 10000909 V_MUL_F32_e32 v1, v8, v5 ; 10020B08 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_MUL_F32_e32 v1, v10, v3 ; 1002070A V_MUL_F32_e32 v2, v11, v2 ; 1004050B V_CVT_PKRTZ_F16_F32_e32 v1, v1, v2 ; 5E020501 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xyzx, IMM[0].xxxy, IMM[0].yyyx 1: MOV TEMP[1].xy, IN[1].xyxx 2: MOV TEMP[1].zw, IMM[0].xxyx 3: MOV OUT[1], TEMP[1] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %15, 1.000000e+00 %25 = fadd float %24, 0.000000e+00 %26 = fmul float %16, 1.000000e+00 %27 = fadd float %26, 0.000000e+00 %28 = fmul float %17, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %15, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %22, float %23, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[0:3], s[8:9], 0x4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000100 V_MOV_B32_e32 v5, 1.000000e+00 ; 7E0A02F2 V_MOV_B32_e32 v6, 0.000000e+00 ; 7E0C0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v6, v5 ; F800020F 05060201 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x0 ; C0800900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v5 ; F80008CF 05020100 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..3] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 17.0000, 3721.0000} IMM[1] FLT32 { 13.0000, 930.2500, 1860.5000, -0.5000} IMM[2] FLT32 { -0.0000, -1.0000, -6.0000, 6.0000} IMM[3] FLT32 { -2.0000, 3.0000, 0.5000, -1.0000} IMM[4] FLT32 { -1.0000, 1.0000, 0.0000, 0.5000} IMM[5] FLT32 { 1.0000, -1.0000, -2.0000, 2.0000} IMM[6] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: RCP TEMP[0].x, CONST[2].xxxx 1: MUL TEMP[0], TEMP[0].xxxx, CONST[0].yyxx 2: FRC TEMP[1], TEMP[0].yyww 3: MOV TEMP[2].w, TEMP[1].wwww 4: ADD TEMP[0], TEMP[0], -TEMP[1] 5: RCP TEMP[2].x, TEMP[0].yyyy 6: MUL TEMP[1].yz, TEMP[0].xyww, IN[0].xyxw 7: MOV TEMP[2].yz, TEMP[1].zyzz 8: FRC TEMP[1].xy, TEMP[2].zyzw 9: MOV TEMP[3].xy, TEMP[1].xyxx 10: MAD TEMP[4], IN[0].yyxx, TEMP[0], -TEMP[1].yyxx 11: RCP TEMP[0].x, TEMP[0].wwww 12: ADD TEMP[4], TEMP[4], IMM[0].xyxy 13: MUL TEMP[5], TEMP[4].xyxy, IMM[0].zzzz 14: MUL TEMP[2], TEMP[2].xxxx, TEMP[5] 15: FRC TEMP[6], TEMP[2] 16: MUL TEMP[2], TEMP[6], TEMP[6] 17: MAD TEMP[2], TEMP[2], IMM[0].wwww, TEMP[4].zzww 18: MUL TEMP[2], TEMP[2], IMM[1].xxxx 19: MUL TEMP[0], TEMP[0].xxxx, TEMP[2] 20: FRC TEMP[6], TEMP[0] 21: MUL TEMP[0], TEMP[6], TEMP[6] 22: MUL TEMP[2], TEMP[0], IMM[1].yyyy 23: MUL TEMP[0], TEMP[0], IMM[1].zzzz 24: FRC TEMP[6], TEMP[0] 25: ADD TEMP[0], TEMP[6], IMM[1].wwww 26: FRC TEMP[6], TEMP[2] 27: ADD TEMP[2], TEMP[6], IMM[1].wwww 28: MOV TEMP[6], -TEMP[2] 29: FSGE TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 30: UIF TEMP[7].xxxx :0 31: MOV TEMP[7].x, IMM[0].xxxx 32: ELSE :0 33: MOV TEMP[7].x, IMM[0].yyyy 34: ENDIF 35: MOV TEMP[7].x, TEMP[7].xxxx 36: FSGE TEMP[8].x, TEMP[6].yyyy, IMM[0].xxxx 37: UIF TEMP[8].xxxx :0 38: MOV TEMP[8].x, IMM[0].xxxx 39: ELSE :0 40: MOV TEMP[8].x, IMM[0].yyyy 41: ENDIF 42: MOV TEMP[7].y, TEMP[8].xxxx 43: FSGE TEMP[8].x, TEMP[6].zzzz, IMM[0].xxxx 44: UIF TEMP[8].xxxx :0 45: MOV TEMP[8].x, IMM[0].xxxx 46: ELSE :0 47: MOV TEMP[8].x, IMM[0].yyyy 48: ENDIF 49: MOV TEMP[7].z, TEMP[8].xxxx 50: FSGE TEMP[6].x, TEMP[6].wwww, IMM[0].xxxx 51: UIF TEMP[6].xxxx :0 52: MOV TEMP[6].x, IMM[0].xxxx 53: ELSE :0 54: MOV TEMP[6].x, IMM[0].yyyy 55: ENDIF 56: MOV TEMP[7].w, TEMP[6].xxxx 57: FSGE TEMP[6].x, TEMP[2].xxxx, IMM[0].xxxx 58: UIF TEMP[6].xxxx :0 59: MOV TEMP[6].x, IMM[2].xxxx 60: ELSE :0 61: MOV TEMP[6].x, IMM[2].yyyy 62: ENDIF 63: MOV TEMP[6].x, TEMP[6].xxxx 64: FSGE TEMP[8].x, TEMP[2].yyyy, IMM[0].xxxx 65: UIF TEMP[8].xxxx :0 66: MOV TEMP[8].x, IMM[2].xxxx 67: ELSE :0 68: MOV TEMP[8].x, IMM[2].yyyy 69: ENDIF 70: MOV TEMP[6].y, TEMP[8].xxxx 71: FSGE TEMP[8].x, TEMP[2].zzzz, IMM[0].xxxx 72: UIF TEMP[8].xxxx :0 73: MOV TEMP[8].x, IMM[2].xxxx 74: ELSE :0 75: MOV TEMP[8].x, IMM[2].yyyy 76: ENDIF 77: MOV TEMP[6].z, TEMP[8].xxxx 78: FSGE TEMP[8].x, TEMP[2].wwww, IMM[0].xxxx 79: UIF TEMP[8].xxxx :0 80: MOV TEMP[8].x, IMM[2].xxxx 81: ELSE :0 82: MOV TEMP[8].x, IMM[2].yyyy 83: ENDIF 84: MOV TEMP[6].w, TEMP[8].xxxx 85: ADD TEMP[2], TEMP[6], TEMP[7] 86: ADD TEMP[4], TEMP[1].xxyy, IMM[2].xyxy 87: MUL TEMP[5], TEMP[2], TEMP[4].zwzw 88: MOV TEMP[6], -TEMP[0] 89: FSGE TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 90: UIF TEMP[7].xxxx :0 91: MOV TEMP[7].x, IMM[0].xxxx 92: ELSE :0 93: MOV TEMP[7].x, IMM[0].yyyy 94: ENDIF 95: MOV TEMP[7].x, TEMP[7].xxxx 96: FSGE TEMP[8].x, TEMP[6].yyyy, IMM[0].xxxx 97: UIF TEMP[8].xxxx :0 98: MOV TEMP[8].x, IMM[0].xxxx 99: ELSE :0 100: MOV TEMP[8].x, IMM[0].yyyy 101: ENDIF 102: MOV TEMP[7].y, TEMP[8].xxxx 103: FSGE TEMP[8].x, TEMP[6].zzzz, IMM[0].xxxx 104: UIF TEMP[8].xxxx :0 105: MOV TEMP[8].x, IMM[0].xxxx 106: ELSE :0 107: MOV TEMP[8].x, IMM[0].yyyy 108: ENDIF 109: MOV TEMP[7].z, TEMP[8].xxxx 110: FSGE TEMP[6].x, TEMP[6].wwww, IMM[0].xxxx 111: UIF TEMP[6].xxxx :0 112: MOV TEMP[6].x, IMM[0].xxxx 113: ELSE :0 114: MOV TEMP[6].x, IMM[0].yyyy 115: ENDIF 116: MOV TEMP[7].w, TEMP[6].xxxx 117: FSGE TEMP[6].x, TEMP[0].xxxx, IMM[0].xxxx 118: UIF TEMP[6].xxxx :0 119: MOV TEMP[6].x, IMM[2].xxxx 120: ELSE :0 121: MOV TEMP[6].x, IMM[2].yyyy 122: ENDIF 123: MOV TEMP[6].x, TEMP[6].xxxx 124: FSGE TEMP[8].x, TEMP[0].yyyy, IMM[0].xxxx 125: UIF TEMP[8].xxxx :0 126: MOV TEMP[8].x, IMM[2].xxxx 127: ELSE :0 128: MOV TEMP[8].x, IMM[2].yyyy 129: ENDIF 130: MOV TEMP[6].y, TEMP[8].xxxx 131: FSGE TEMP[8].x, TEMP[0].zzzz, IMM[0].xxxx 132: UIF TEMP[8].xxxx :0 133: MOV TEMP[8].x, IMM[2].xxxx 134: ELSE :0 135: MOV TEMP[8].x, IMM[2].yyyy 136: ENDIF 137: MOV TEMP[6].z, TEMP[8].xxxx 138: FSGE TEMP[8].x, TEMP[0].wwww, IMM[0].xxxx 139: UIF TEMP[8].xxxx :0 140: MOV TEMP[8].x, IMM[2].xxxx 141: ELSE :0 142: MOV TEMP[8].x, IMM[2].yyyy 143: ENDIF 144: MOV TEMP[6].w, TEMP[8].xxxx 145: ADD TEMP[0], TEMP[6], TEMP[7] 146: MAD TEMP[4], TEMP[4].xxyy, TEMP[0], TEMP[5] 147: ADD TEMP[4], -TEMP[4].xyxz, TEMP[4].zwyw 148: MAD TEMP[6].zw, TEMP[1].xyxy, IMM[2].zzzz, IMM[2].wwww 149: MOV TEMP[3].zw, TEMP[6].wwzw 150: MUL TEMP[5], TEMP[1].xyxy, TEMP[3] 151: MAD TEMP[3], TEMP[1].xyxx, IMM[3].xxxx, IMM[3].yyyy 152: MUL TEMP[3], TEMP[3], TEMP[5].xyxx 153: LRP TEMP[6].xy, TEMP[3].wwww, TEMP[0].zwzw, TEMP[0] 154: MOV TEMP[1].xy, TEMP[6].xyxx 155: LRP TEMP[6].zw, TEMP[3].yyyy, TEMP[2].xyyw, TEMP[2].xyxz 156: MOV TEMP[1].zw, TEMP[6].wwzw 157: MAD TEMP[0], TEMP[4], TEMP[5].zzww, TEMP[1] 158: LRP TEMP[1], TEMP[3], TEMP[0].wyww, TEMP[0].zxzz 159: MOV TEMP[2].z, TEMP[1].zyzz 160: MUL TEMP[0], TEMP[1], CONST[1].xxxx 161: MUL TEMP[0], TEMP[0], IMM[3].zzzz 162: MAD TEMP[0], TEMP[0], IMM[4].xyzz, IMM[4].wwzy 163: MOV TEMP[2].w, IMM[0].xxxx 164: RCP TEMP[2].x, CONST[0].xxxx 165: ADD TEMP[1].xy, TEMP[2].xwzw, IN[0] 166: MOV TEMP[1].xy, TEMP[1].xyyy 167: TEX TEMP[1].xw, TEMP[1], SAMP[0], 2D 168: MOV TEMP[3].x, TEMP[1].xxxw 169: MOV TEMP[2].y, -TEMP[2].xxxx 170: MOV TEMP[4].yw, TEMP[2].xyxx 171: RCP TEMP[4].x, CONST[0].yyyy 172: ADD TEMP[1].xy, TEMP[4].yxzw, IN[0] 173: ADD TEMP[6].yw, TEMP[4].xwzx, IN[0].xxzy 174: MOV TEMP[6].xy, TEMP[6].ywww 175: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 176: MOV TEMP[3].z, TEMP[6].xxxx 177: MOV TEMP[1].xy, TEMP[1].xyyy 178: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 179: MOV TEMP[3].y, TEMP[1].xxxx 180: DP3 TEMP[1].x, TEMP[3].yzxx, IMM[5].xyzz 181: ADD TEMP[6].yw, TEMP[2], IN[0].xxzy 182: MOV TEMP[6].xy, TEMP[6].ywww 183: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 184: MOV TEMP[5].z, TEMP[6].xxxx 185: MOV TEMP[2].z, -TEMP[4].xxxx 186: ADD TEMP[6].yw, TEMP[2].xxzz, IN[0].xxzy 187: MOV TEMP[4].yw, TEMP[6].wyww 188: MOV TEMP[6].xy, TEMP[6].ywww 189: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 190: MOV TEMP[5].y, TEMP[6].xxxx 191: ADD TEMP[6].xy, TEMP[2].yzzw, IN[0] 192: MOV TEMP[2].xy, TEMP[6].xyxx 193: ADD TEMP[7].zw, TEMP[2].xywz, IN[0].xyxy 194: MOV TEMP[7].xy, TEMP[7].zwww 195: TEX TEMP[7].x, TEMP[7], SAMP[0], 2D 196: MOV TEMP[5].w, TEMP[7].xxxx 197: MOV TEMP[6].xy, TEMP[6].xyyy 198: TEX TEMP[6].xw, TEMP[6], SAMP[0], 2D 199: MOV TEMP[2].w, TEMP[6].wwww 200: MOV TEMP[5].x, TEMP[6].xxxx 201: DP3 TEMP[6].x, TEMP[5].xyww, IMM[5].xxww 202: DP3 TEMP[7].x, TEMP[5].xyzz, IMM[5].xyww 203: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[7].xxxx 204: MOV TEMP[5].x, TEMP[1].xxxx 205: MOV TEMP[4].z, IMM[0].xxxx 206: ADD TEMP[1].xz, TEMP[4].zyxw, IN[0].xyyw 207: MOV TEMP[1].xy, TEMP[1].xzzz 208: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 209: MOV TEMP[3].w, TEMP[1].xxxx 210: DP3 TEMP[1].x, TEMP[3].yzww, IMM[3].wwxx 211: ADD TEMP[1].y, TEMP[1].xxxx, TEMP[6].xxxx 212: MOV TEMP[5].y, TEMP[1].yyyy 213: MUL TEMP[1].xy, TEMP[5], CONST[3].xxxx 214: MOV TEMP[2].xy, TEMP[1].xyxx 215: MUL TEMP[4].xyz, TEMP[2].yxyw, IMM[3].zzzz 216: DP2 TEMP[2].x, TEMP[1].xyyy, TEMP[1].xyyy 217: MAX TEMP[1].x, TEMP[2].xxxx, IMM[6].xxxx 218: RSQ TEMP[5].x, TEMP[1].xxxx 219: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx 220: CMP TEMP[2].x, -TEMP[1].xxxx, TEMP[5].xxxx, IMM[0].xxxx 221: MAD TEMP[1].yzw, TEMP[4].xxyz, IMM[4].xxyz, IMM[4].xwwz 222: MOV TEMP[2].yzw, TEMP[1].zyzw 223: ADD TEMP[3], -TEMP[0], TEMP[2].yzwx 224: MOV_SAT TEMP[1].x, TEMP[2].xxxx 225: MAD TEMP[0], TEMP[1].xxxx, TEMP[3], TEMP[0] 226: MOV OUT[0], TEMP[0] 227: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %30 = load <8 x i32> addrspace(2)* %29, !tbaa !0 %31 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %32 = load <4 x i32> addrspace(2)* %31, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = fdiv float 1.000000e+00, %27 %36 = fmul float %35, %25 %37 = fmul float %35, %25 %38 = fmul float %35, %24 %39 = fmul float %35, %24 %40 = call float @llvm.AMDIL.fraction.(float %37) %41 = call float @llvm.AMDIL.fraction.(float %37) %42 = call float @llvm.AMDIL.fraction.(float %39) %43 = call float @llvm.AMDIL.fraction.(float %39) %44 = fsub float -0.000000e+00, %40 %45 = fadd float %36, %44 %46 = fsub float -0.000000e+00, %41 %47 = fadd float %37, %46 %48 = fsub float -0.000000e+00, %42 %49 = fadd float %38, %48 %50 = fsub float -0.000000e+00, %43 %51 = fadd float %39, %50 %52 = fdiv float 1.000000e+00, %47 %53 = fmul float %47, %34 %54 = fmul float %51, %33 %55 = call float @llvm.AMDIL.fraction.(float %54) %56 = call float @llvm.AMDIL.fraction.(float %53) %57 = fsub float -0.000000e+00, %56 %58 = fmul float %34, %45 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %34, %47 %62 = fadd float %61, %60 %63 = fsub float -0.000000e+00, %55 %64 = fmul float %33, %49 %65 = fadd float %64, %63 %66 = fsub float -0.000000e+00, %55 %67 = fmul float %33, %51 %68 = fadd float %67, %66 %69 = fdiv float 1.000000e+00, %51 %70 = fadd float %59, 0.000000e+00 %71 = fadd float %62, 1.000000e+00 %72 = fadd float %65, 0.000000e+00 %73 = fadd float %68, 1.000000e+00 %74 = fmul float %70, 1.700000e+01 %75 = fmul float %71, 1.700000e+01 %76 = fmul float %70, 1.700000e+01 %77 = fmul float %71, 1.700000e+01 %78 = fmul float %52, %74 %79 = fmul float %52, %75 %80 = fmul float %52, %76 %81 = fmul float %52, %77 %82 = call float @llvm.AMDIL.fraction.(float %78) %83 = call float @llvm.AMDIL.fraction.(float %79) %84 = call float @llvm.AMDIL.fraction.(float %80) %85 = call float @llvm.AMDIL.fraction.(float %81) %86 = fmul float %82, %82 %87 = fmul float %83, %83 %88 = fmul float %84, %84 %89 = fmul float %85, %85 %90 = fmul float %86, 3.721000e+03 %91 = fadd float %90, %72 %92 = fmul float %87, 3.721000e+03 %93 = fadd float %92, %72 %94 = fmul float %88, 3.721000e+03 %95 = fadd float %94, %73 %96 = fmul float %89, 3.721000e+03 %97 = fadd float %96, %73 %98 = fmul float %91, 1.300000e+01 %99 = fmul float %93, 1.300000e+01 %100 = fmul float %95, 1.300000e+01 %101 = fmul float %97, 1.300000e+01 %102 = fmul float %69, %98 %103 = fmul float %69, %99 %104 = fmul float %69, %100 %105 = fmul float %69, %101 %106 = call float @llvm.AMDIL.fraction.(float %102) %107 = call float @llvm.AMDIL.fraction.(float %103) %108 = call float @llvm.AMDIL.fraction.(float %104) %109 = call float @llvm.AMDIL.fraction.(float %105) %110 = fmul float %106, %106 %111 = fmul float %107, %107 %112 = fmul float %108, %108 %113 = fmul float %109, %109 %114 = fmul float %110, 9.302500e+02 %115 = fmul float %111, 9.302500e+02 %116 = fmul float %112, 9.302500e+02 %117 = fmul float %113, 9.302500e+02 %118 = fmul float %110, 1.860500e+03 %119 = fmul float %111, 1.860500e+03 %120 = fmul float %112, 1.860500e+03 %121 = fmul float %113, 1.860500e+03 %122 = call float @llvm.AMDIL.fraction.(float %118) %123 = call float @llvm.AMDIL.fraction.(float %119) %124 = call float @llvm.AMDIL.fraction.(float %120) %125 = call float @llvm.AMDIL.fraction.(float %121) %126 = fadd float %122, -5.000000e-01 %127 = fadd float %123, -5.000000e-01 %128 = fadd float %124, -5.000000e-01 %129 = fadd float %125, -5.000000e-01 %130 = call float @llvm.AMDIL.fraction.(float %114) %131 = call float @llvm.AMDIL.fraction.(float %115) %132 = call float @llvm.AMDIL.fraction.(float %116) %133 = call float @llvm.AMDIL.fraction.(float %117) %134 = fadd float %130, -5.000000e-01 %135 = fadd float %131, -5.000000e-01 %136 = fadd float %132, -5.000000e-01 %137 = fadd float %133, -5.000000e-01 %138 = fsub float -0.000000e+00, %134 %139 = fsub float -0.000000e+00, %135 %140 = fsub float -0.000000e+00, %136 %141 = fsub float -0.000000e+00, %137 %142 = fcmp oge float %138, 0.000000e+00 %143 = sext i1 %142 to i32 %144 = bitcast i32 %143 to float %145 = bitcast float %144 to i32 %146 = icmp ne i32 %145, 0 %. = select i1 %146, float 0.000000e+00, float 1.000000e+00 %147 = fcmp oge float %139, 0.000000e+00 %148 = sext i1 %147 to i32 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = icmp ne i32 %150, 0 %temp32.0 = select i1 %151, float 0.000000e+00, float 1.000000e+00 %152 = fcmp oge float %140, 0.000000e+00 %153 = sext i1 %152 to i32 %154 = bitcast i32 %153 to float %155 = bitcast float %154 to i32 %156 = icmp ne i32 %155, 0 %.81 = select i1 %156, float 0.000000e+00, float 1.000000e+00 %157 = fcmp oge float %141, 0.000000e+00 %158 = sext i1 %157 to i32 %159 = bitcast i32 %158 to float %160 = bitcast float %159 to i32 %161 = icmp ne i32 %160, 0 %temp24.0 = select i1 %161, float 0.000000e+00, float 1.000000e+00 %162 = fcmp oge float %134, 0.000000e+00 %163 = sext i1 %162 to i32 %164 = bitcast i32 %163 to float %165 = bitcast float %164 to i32 %166 = icmp ne i32 %165, 0 %.82 = select i1 %166, float -0.000000e+00, float -1.000000e+00 %167 = fcmp oge float %135, 0.000000e+00 %168 = sext i1 %167 to i32 %169 = bitcast i32 %168 to float %170 = bitcast float %169 to i32 %171 = icmp ne i32 %170, 0 %temp32.2 = select i1 %171, float -0.000000e+00, float -1.000000e+00 %172 = fcmp oge float %136, 0.000000e+00 %173 = sext i1 %172 to i32 %174 = bitcast i32 %173 to float %175 = bitcast float %174 to i32 %176 = icmp ne i32 %175, 0 %.83 = select i1 %176, float -0.000000e+00, float -1.000000e+00 %177 = fcmp oge float %137, 0.000000e+00 %178 = sext i1 %177 to i32 %179 = bitcast i32 %178 to float %180 = bitcast float %179 to i32 %181 = icmp ne i32 %180, 0 %temp32.4 = select i1 %181, float -0.000000e+00, float -1.000000e+00 %182 = fadd float %.82, %. %183 = fadd float %temp32.2, %temp32.0 %184 = fadd float %.83, %.81 %185 = fadd float %temp32.4, %temp24.0 %186 = fadd float %55, -0.000000e+00 %187 = fadd float %55, -1.000000e+00 %188 = fadd float %56, -0.000000e+00 %189 = fadd float %56, -1.000000e+00 %190 = fmul float %182, %188 %191 = fmul float %183, %189 %192 = fmul float %184, %188 %193 = fmul float %185, %189 %194 = fsub float -0.000000e+00, %126 %195 = fsub float -0.000000e+00, %127 %196 = fsub float -0.000000e+00, %128 %197 = fsub float -0.000000e+00, %129 %198 = fcmp oge float %194, 0.000000e+00 %199 = sext i1 %198 to i32 %200 = bitcast i32 %199 to float %201 = bitcast float %200 to i32 %202 = icmp ne i32 %201, 0 %.84 = select i1 %202, float 0.000000e+00, float 1.000000e+00 %203 = fcmp oge float %195, 0.000000e+00 %204 = sext i1 %203 to i32 %205 = bitcast i32 %204 to float %206 = bitcast float %205 to i32 %207 = icmp ne i32 %206, 0 %temp32.5 = select i1 %207, float 0.000000e+00, float 1.000000e+00 %208 = fcmp oge float %196, 0.000000e+00 %209 = sext i1 %208 to i32 %210 = bitcast i32 %209 to float %211 = bitcast float %210 to i32 %212 = icmp ne i32 %211, 0 %.85 = select i1 %212, float 0.000000e+00, float 1.000000e+00 %213 = fcmp oge float %197, 0.000000e+00 %214 = sext i1 %213 to i32 %215 = bitcast i32 %214 to float %216 = bitcast float %215 to i32 %217 = icmp ne i32 %216, 0 %temp24.2 = select i1 %217, float 0.000000e+00, float 1.000000e+00 %218 = fcmp oge float %126, 0.000000e+00 %219 = sext i1 %218 to i32 %220 = bitcast i32 %219 to float %221 = bitcast float %220 to i32 %222 = icmp ne i32 %221, 0 %.86 = select i1 %222, float -0.000000e+00, float -1.000000e+00 %223 = fcmp oge float %127, 0.000000e+00 %224 = sext i1 %223 to i32 %225 = bitcast i32 %224 to float %226 = bitcast float %225 to i32 %227 = icmp ne i32 %226, 0 %temp32.7 = select i1 %227, float -0.000000e+00, float -1.000000e+00 %228 = fcmp oge float %128, 0.000000e+00 %229 = sext i1 %228 to i32 %230 = bitcast i32 %229 to float %231 = bitcast float %230 to i32 %232 = icmp ne i32 %231, 0 %.87 = select i1 %232, float -0.000000e+00, float -1.000000e+00 %233 = fcmp oge float %129, 0.000000e+00 %234 = sext i1 %233 to i32 %235 = bitcast i32 %234 to float %236 = bitcast float %235 to i32 %237 = icmp ne i32 %236, 0 %temp32.9 = select i1 %237, float -0.000000e+00, float -1.000000e+00 %238 = fadd float %.86, %.84 %239 = fadd float %temp32.7, %temp32.5 %240 = fadd float %.87, %.85 %241 = fadd float %temp32.9, %temp24.2 %242 = fmul float %186, %238 %243 = fadd float %242, %190 %244 = fmul float %186, %239 %245 = fadd float %244, %191 %246 = fmul float %187, %240 %247 = fadd float %246, %192 %248 = fmul float %187, %241 %249 = fadd float %248, %193 %250 = fsub float -0.000000e+00, %243 %251 = fadd float %250, %247 %252 = fsub float -0.000000e+00, %245 %253 = fadd float %252, %249 %254 = fsub float -0.000000e+00, %243 %255 = fadd float %254, %245 %256 = fsub float -0.000000e+00, %247 %257 = fadd float %256, %249 %258 = fmul float %55, -6.000000e+00 %259 = fadd float %258, 6.000000e+00 %260 = fmul float %56, -6.000000e+00 %261 = fadd float %260, 6.000000e+00 %262 = fmul float %55, %55 %263 = fmul float %56, %56 %264 = fmul float %55, %259 %265 = fmul float %56, %261 %266 = fmul float %55, -2.000000e+00 %267 = fadd float %266, 3.000000e+00 %268 = fmul float %56, -2.000000e+00 %269 = fadd float %268, 3.000000e+00 %270 = fmul float %55, -2.000000e+00 %271 = fadd float %270, 3.000000e+00 %272 = fmul float %55, -2.000000e+00 %273 = fadd float %272, 3.000000e+00 %274 = fmul float %267, %262 %275 = fmul float %269, %263 %276 = fmul float %271, %262 %277 = fmul float %273, %262 %278 = call float @llvm.AMDGPU.lrp(float %277, float %240, float %238) %279 = call float @llvm.AMDGPU.lrp(float %277, float %241, float %239) %280 = call float @llvm.AMDGPU.lrp(float %275, float %183, float %182) %281 = call float @llvm.AMDGPU.lrp(float %275, float %185, float %184) %282 = fmul float %251, %264 %283 = fadd float %282, %278 %284 = fmul float %253, %264 %285 = fadd float %284, %279 %286 = fmul float %255, %265 %287 = fadd float %286, %280 %288 = fmul float %257, %265 %289 = fadd float %288, %281 %290 = call float @llvm.AMDGPU.lrp(float %274, float %289, float %287) %291 = call float @llvm.AMDGPU.lrp(float %275, float %285, float %283) %292 = call float @llvm.AMDGPU.lrp(float %276, float %289, float %287) %293 = call float @llvm.AMDGPU.lrp(float %277, float %289, float %287) %294 = fmul float %290, %26 %295 = fmul float %291, %26 %296 = fmul float %292, %26 %297 = fmul float %293, %26 %298 = fmul float %294, 5.000000e-01 %299 = fmul float %295, 5.000000e-01 %300 = fmul float %296, 5.000000e-01 %301 = fmul float %297, 5.000000e-01 %302 = fmul float %298, -1.000000e+00 %303 = fadd float %302, 5.000000e-01 %304 = fmul float %299, 1.000000e+00 %305 = fadd float %304, 5.000000e-01 %306 = fmul float %300, 0.000000e+00 %307 = fadd float %306, 0.000000e+00 %308 = fmul float %301, 0.000000e+00 %309 = fadd float %308, 1.000000e+00 %310 = fdiv float 1.000000e+00, %24 %311 = fadd float %310, %33 %312 = fadd float 0.000000e+00, %34 %313 = bitcast float %311 to i32 %314 = bitcast float %312 to i32 %315 = insertelement <2 x i32> undef, i32 %313, i32 0 %316 = insertelement <2 x i32> %315, i32 %314, i32 1 %317 = bitcast <8 x i32> %30 to <32 x i8> %318 = bitcast <4 x i32> %32 to <16 x i8> %319 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %316, <32 x i8> %317, <16 x i8> %318, i32 2) %320 = extractelement <4 x float> %319, i32 0 %321 = fsub float -0.000000e+00, %310 %322 = fdiv float 1.000000e+00, %25 %323 = fadd float %321, %33 %324 = fadd float %322, %34 %325 = fadd float %310, %33 %326 = fadd float %322, %34 %327 = bitcast float %325 to i32 %328 = bitcast float %326 to i32 %329 = insertelement <2 x i32> undef, i32 %327, i32 0 %330 = insertelement <2 x i32> %329, i32 %328, i32 1 %331 = bitcast <8 x i32> %30 to <32 x i8> %332 = bitcast <4 x i32> %32 to <16 x i8> %333 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %330, <32 x i8> %331, <16 x i8> %332, i32 2) %334 = extractelement <4 x float> %333, i32 0 %335 = bitcast float %323 to i32 %336 = bitcast float %324 to i32 %337 = insertelement <2 x i32> undef, i32 %335, i32 0 %338 = insertelement <2 x i32> %337, i32 %336, i32 1 %339 = bitcast <8 x i32> %30 to <32 x i8> %340 = bitcast <4 x i32> %32 to <16 x i8> %341 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %338, <32 x i8> %339, <16 x i8> %340, i32 2) %342 = extractelement <4 x float> %341, i32 0 %343 = fmul float %342, 1.000000e+00 %344 = fmul float %334, -1.000000e+00 %345 = fadd float %344, %343 %346 = fmul float %320, -2.000000e+00 %347 = fadd float %345, %346 %348 = fadd float %321, %33 %349 = fadd float 0.000000e+00, %34 %350 = bitcast float %348 to i32 %351 = bitcast float %349 to i32 %352 = insertelement <2 x i32> undef, i32 %350, i32 0 %353 = insertelement <2 x i32> %352, i32 %351, i32 1 %354 = bitcast <8 x i32> %30 to <32 x i8> %355 = bitcast <4 x i32> %32 to <16 x i8> %356 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %353, <32 x i8> %354, <16 x i8> %355, i32 2) %357 = extractelement <4 x float> %356, i32 0 %358 = fsub float -0.000000e+00, %322 %359 = fadd float %310, %33 %360 = fadd float %358, %34 %361 = bitcast float %359 to i32 %362 = bitcast float %360 to i32 %363 = insertelement <2 x i32> undef, i32 %361, i32 0 %364 = insertelement <2 x i32> %363, i32 %362, i32 1 %365 = bitcast <8 x i32> %30 to <32 x i8> %366 = bitcast <4 x i32> %32 to <16 x i8> %367 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %364, <32 x i8> %365, <16 x i8> %366, i32 2) %368 = extractelement <4 x float> %367, i32 0 %369 = fadd float %321, %33 %370 = fadd float %358, %34 %371 = fadd float 0.000000e+00, %33 %372 = fadd float %358, %34 %373 = bitcast float %371 to i32 %374 = bitcast float %372 to i32 %375 = insertelement <2 x i32> undef, i32 %373, i32 0 %376 = insertelement <2 x i32> %375, i32 %374, i32 1 %377 = bitcast <8 x i32> %30 to <32 x i8> %378 = bitcast <4 x i32> %32 to <16 x i8> %379 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %376, <32 x i8> %377, <16 x i8> %378, i32 2) %380 = extractelement <4 x float> %379, i32 0 %381 = bitcast float %369 to i32 %382 = bitcast float %370 to i32 %383 = insertelement <2 x i32> undef, i32 %381, i32 0 %384 = insertelement <2 x i32> %383, i32 %382, i32 1 %385 = bitcast <8 x i32> %30 to <32 x i8> %386 = bitcast <4 x i32> %32 to <16 x i8> %387 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %384, <32 x i8> %385, <16 x i8> %386, i32 2) %388 = extractelement <4 x float> %387, i32 0 %389 = fmul float %388, 1.000000e+00 %390 = fmul float %368, 1.000000e+00 %391 = fadd float %390, %389 %392 = fmul float %380, 2.000000e+00 %393 = fadd float %391, %392 %394 = fmul float %388, 1.000000e+00 %395 = fmul float %368, -1.000000e+00 %396 = fadd float %395, %394 %397 = fmul float %357, 2.000000e+00 %398 = fadd float %396, %397 %399 = fadd float %347, %398 %400 = fadd float 0.000000e+00, %33 %401 = fadd float %322, %34 %402 = bitcast float %400 to i32 %403 = bitcast float %401 to i32 %404 = insertelement <2 x i32> undef, i32 %402, i32 0 %405 = insertelement <2 x i32> %404, i32 %403, i32 1 %406 = bitcast <8 x i32> %30 to <32 x i8> %407 = bitcast <4 x i32> %32 to <16 x i8> %408 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %405, <32 x i8> %406, <16 x i8> %407, i32 2) %409 = extractelement <4 x float> %408, i32 0 %410 = fmul float %342, -1.000000e+00 %411 = fmul float %334, -1.000000e+00 %412 = fadd float %411, %410 %413 = fmul float %409, -2.000000e+00 %414 = fadd float %412, %413 %415 = fadd float %414, %393 %416 = fmul float %399, %28 %417 = fmul float %415, %28 %418 = fmul float %417, 5.000000e-01 %419 = fmul float %416, 5.000000e-01 %420 = fmul float %417, 5.000000e-01 %421 = fmul float %416, %416 %422 = fmul float %417, %417 %423 = fadd float %421, %422 %424 = fcmp uge float %423, 0x3E7AD7F2A0000000 %425 = select i1 %424, float %423, float 0x3E7AD7F2A0000000 %426 = call float @llvm.AMDGPU.rsq.clamped.f32(float %425) %427 = fmul float %426, %425 %428 = fsub float -0.000000e+00, %425 %429 = call float @llvm.AMDGPU.cndlt(float %428, float %427, float 0.000000e+00) %430 = fmul float %418, -1.000000e+00 %431 = fadd float %430, 5.000000e-01 %432 = fmul float %419, 1.000000e+00 %433 = fadd float %432, 5.000000e-01 %434 = fmul float %420, 0.000000e+00 %435 = fadd float %434, 0.000000e+00 %436 = fsub float -0.000000e+00, %303 %437 = fadd float %436, %431 %438 = fsub float -0.000000e+00, %305 %439 = fadd float %438, %433 %440 = fsub float -0.000000e+00, %307 %441 = fadd float %440, %435 %442 = fsub float -0.000000e+00, %309 %443 = fadd float %442, %429 %444 = call float @llvm.AMDIL.clamp.(float %429, float 0.000000e+00, float 1.000000e+00) %445 = fmul float %444, %437 %446 = fadd float %445, %303 %447 = fmul float %444, %439 %448 = fadd float %447, %305 %449 = fmul float %444, %441 %450 = fadd float %449, %307 %451 = fmul float %444, %443 %452 = fadd float %451, %309 %453 = call i32 @llvm.SI.packf16(float %446, float %448) %454 = bitcast i32 %453 to float %455 = call i32 @llvm.SI.packf16(float %450, float %452) %456 = bitcast i32 %455 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %454, float %456, float %454, float %456) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x8 ; C2050108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_RCP_F32_e32 v4, s10 ; 7E08540A V_MUL_F32_e32 v5, s8, v4 ; 100A0808 V_FRACT_F32_e32 v6, v5 ; 7E0C4105 V_SUB_F32_e32 v5, v5, v6 ; 080A0D05 V_MUL_F32_e32 v6, v5, v2 ; 100C0505 V_FRACT_F32_e32 v7, v6 ; 7E0E4106 V_SUB_F32_e32 v6, v6, v7 ; 080C0F06 V_INTERP_P1_F32 v8, v0, 1, 0, [m0] ; C8200100 V_INTERP_P2_F32 v8, [v8], v1, 1, 0, [m0] ; C8210101 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x1 ; C2048101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s9, v4 ; 10000809 V_FRACT_F32_e32 v1, v0 ; 7E024100 V_SUB_F32_e32 v0, v0, v1 ; 08000300 V_MUL_F32_e32 v1, v0, v8 ; 10021100 V_FRACT_F32_e32 v4, v1 ; 7E084101 V_SUB_F32_e32 v1, v1, v4 ; 08020901 V_ADD_F32_e32 v9, 1.000000e+00, v1 ; 061202F2 V_MUL_F32_e32 v9, 1.700000e+01, v9 ; 101212FF 41880000 V_RCP_F32_e32 v0, v0 ; 7E005500 V_MUL_F32_e32 v9, v0, v9 ; 10121300 V_FRACT_F32_e32 v9, v9 ; 7E124109 V_MUL_F32_e32 v9, v9, v9 ; 10121309 V_MOV_B32_e32 v10, 3.721000e+03 ; 7E1402FF 45689000 V_MAD_F32 v11, v9, v10, v6, 0, 0 ; D282000B 041A1509 V_MUL_F32_e32 v11, 1.300000e+01, v11 ; 101616FF 41500000 V_RCP_F32_e32 v5, v5 ; 7E0A5505 V_MUL_F32_e32 v11, v5, v11 ; 10161705 V_FRACT_F32_e32 v11, v11 ; 7E16410B V_MUL_F32_e32 v11, v11, v11 ; 1016170B V_MUL_F32_e32 v12, 1.860500e+03, v11 ; 101816FF 44E89000 V_FRACT_F32_e32 v12, v12 ; 7E18410C V_ADD_F32_e32 v12, -5.000000e-01, v12 ; 061818F1 V_CMP_GE_F32_e64 s[10:11], v12, 0.000000e+00, 0, 0 ; D00C000A 0001010C V_MOV_B32_e32 v13, 0x80000000 ; 7E1A02FF 80000000 V_CNDMASK_B32_e64 v14, -1.000000e+00, v13, s[10:11], 0, 0, 0, 0 ; D200000E 002A1AF3 V_MOV_B32_e32 v15, 0x80000000 ; 7E1E02FF 80000000 V_XOR_B32_e32 v12, v12, v15 ; 3A181F0C V_CMP_GE_F32_e64 s[10:11], v12, 0.000000e+00, 0, 0 ; D00C000A 0001010C V_CNDMASK_B32_e64 v12, 1.000000e+00, 0, s[10:11], 0, 0, 0, 0 ; D200000C 002900F2 V_ADD_F32_e32 v12, v14, v12 ; 0618190E V_MUL_F32_e32 v11, 9.302500e+02, v11 ; 101616FF 44689000 V_FRACT_F32_e32 v11, v11 ; 7E16410B V_ADD_F32_e32 v11, -5.000000e-01, v11 ; 061616F1 V_CMP_GE_F32_e64 s[10:11], v11, 0.000000e+00, 0, 0 ; D00C000A 0001010B V_CNDMASK_B32_e64 v14, -1.000000e+00, v13, s[10:11], 0, 0, 0, 0 ; D200000E 002A1AF3 V_XOR_B32_e32 v11, v11, v15 ; 3A161F0B V_CMP_GE_F32_e64 s[10:11], v11, 0.000000e+00, 0, 0 ; D00C000A 0001010B V_CNDMASK_B32_e64 v11, 1.000000e+00, 0, s[10:11], 0, 0, 0, 0 ; D200000B 002900F2 V_ADD_F32_e32 v11, v14, v11 ; 0616170E V_ADD_F32_e32 v14, -1.000000e+00, v4 ; 061C08F3 V_MUL_F32_e32 v16, v11, v14 ; 10201D0B V_MAD_F32 v16, v7, v12, v16, 0, 0 ; D2820010 04421907 V_ADD_F32_e32 v17, 1.000000e+00, v6 ; 06220CF2 V_MAD_F32 v9, v9, v10, v17, 0, 0 ; D2820009 04461509 V_MUL_F32_e32 v9, 1.300000e+01, v9 ; 101212FF 41500000 V_MUL_F32_e32 v9, v5, v9 ; 10121305 V_FRACT_F32_e32 v9, v9 ; 7E124109 V_MUL_F32_e32 v9, v9, v9 ; 10121309 V_MUL_F32_e32 v18, 1.860500e+03, v9 ; 102412FF 44E89000 V_FRACT_F32_e32 v18, v18 ; 7E244112 V_ADD_F32_e32 v18, -5.000000e-01, v18 ; 062424F1 V_CMP_GE_F32_e64 s[10:11], v18, 0.000000e+00, 0, 0 ; D00C000A 00010112 V_CNDMASK_B32_e64 v19, -1.000000e+00, v13, s[10:11], 0, 0, 0, 0 ; D2000013 002A1AF3 V_XOR_B32_e32 v18, v18, v15 ; 3A241F12 V_CMP_GE_F32_e64 s[10:11], v18, 0.000000e+00, 0, 0 ; D00C000A 00010112 V_CNDMASK_B32_e64 v18, 1.000000e+00, 0, s[10:11], 0, 0, 0, 0 ; D2000012 002900F2 V_ADD_F32_e32 v18, v19, v18 ; 06242513 V_MUL_F32_e32 v9, 9.302500e+02, v9 ; 101212FF 44689000 V_FRACT_F32_e32 v9, v9 ; 7E124109 V_ADD_F32_e32 v9, -5.000000e-01, v9 ; 061212F1 V_CMP_GE_F32_e64 s[10:11], v9, 0.000000e+00, 0, 0 ; D00C000A 00010109 V_CNDMASK_B32_e64 v19, -1.000000e+00, v13, s[10:11], 0, 0, 0, 0 ; D2000013 002A1AF3 V_XOR_B32_e32 v9, v9, v15 ; 3A121F09 V_CMP_GE_F32_e64 s[10:11], v9, 0.000000e+00, 0, 0 ; D00C000A 00010109 V_CNDMASK_B32_e64 v9, 1.000000e+00, 0, s[10:11], 0, 0, 0, 0 ; D2000009 002900F2 V_ADD_F32_e32 v9, v19, v9 ; 06121313 V_MUL_F32_e32 v14, v9, v14 ; 101C1D09 V_ADD_F32_e32 v19, -1.000000e+00, v7 ; 06260EF3 V_MAD_F32 v14, v19, v18, v14, 0, 0 ; D282000E 043A2513 V_SUB_F32_e32 v20, v14, v16 ; 0828210E V_MOV_B32_e32 v21, 3.000000e+00 ; 7E2A02FF 40400000 V_MAD_F32 v22, v7, -2.000000e+00, v21, 0, 0 ; D2820016 0455EB07 V_MUL_F32_e32 v23, v7, v7 ; 102E0F07 V_MUL_F32_e32 v22, v22, v23 ; 102C2F16 V_SUB_F32_e32 v23, 1.000000e+00, v22 ; 082E2CF2 V_MUL_F32_e32 v12, v23, v12 ; 10181917 V_MAD_F32 v12, v22, v18, v12, 0, 0 ; D282000C 04322516 V_MOV_B32_e32 v18, 6.000000e+00 ; 7E2402FF 40C00000 V_MOV_B32_e32 v24, -6.000000e+00 ; 7E3002FF C0C00000 V_MAD_F32 v25, v7, v24, v18, 0, 0 ; D2820019 044A3107 V_MUL_F32_e32 v25, v7, v25 ; 10323307 V_MAD_F32 v12, v20, v25, v12, 0, 0 ; D282000C 04323314 V_MUL_F32_e32 v1, 1.700000e+01, v1 ; 100202FF 41880000 V_MUL_F32_e32 v0, v0, v1 ; 10000300 V_FRACT_F32_e32 v0, v0 ; 7E004100 V_MUL_F32_e32 v0, v0, v0 ; 10000100 V_MAD_F32 v1, v0, v10, v6, 0, 0 ; D2820001 041A1500 V_MUL_F32_e32 v1, 1.300000e+01, v1 ; 100202FF 41500000 V_MUL_F32_e32 v1, v5, v1 ; 10020305 V_FRACT_F32_e32 v1, v1 ; 7E024101 V_MUL_F32_e32 v1, v1, v1 ; 10020301 V_MUL_F32_e32 v6, 1.860500e+03, v1 ; 100C02FF 44E89000 V_FRACT_F32_e32 v6, v6 ; 7E0C4106 V_ADD_F32_e32 v6, -5.000000e-01, v6 ; 060C0CF1 V_CMP_GE_F32_e64 s[10:11], v6, 0.000000e+00, 0, 0 ; D00C000A 00010106 V_CNDMASK_B32_e64 v20, -1.000000e+00, v13, s[10:11], 0, 0, 0, 0 ; D2000014 002A1AF3 V_XOR_B32_e32 v6, v6, v15 ; 3A0C1F06 V_CMP_GE_F32_e64 s[10:11], v6, 0.000000e+00, 0, 0 ; D00C000A 00010106 V_CNDMASK_B32_e64 v6, 1.000000e+00, 0, s[10:11], 0, 0, 0, 0 ; D2000006 002900F2 V_ADD_F32_e32 v6, v20, v6 ; 060C0D14 V_MUL_F32_e32 v1, 9.302500e+02, v1 ; 100202FF 44689000 V_FRACT_F32_e32 v1, v1 ; 7E024101 V_ADD_F32_e32 v1, -5.000000e-01, v1 ; 060202F1 V_CMP_GE_F32_e64 s[10:11], v1, 0.000000e+00, 0, 0 ; D00C000A 00010101 V_CNDMASK_B32_e64 v20, -1.000000e+00, v13, s[10:11], 0, 0, 0, 0 ; D2000014 002A1AF3 V_XOR_B32_e32 v1, v1, v15 ; 3A021F01 V_CMP_GE_F32_e64 s[10:11], v1, 0.000000e+00, 0, 0 ; D00C000A 00010101 V_CNDMASK_B32_e64 v1, 1.000000e+00, 0, s[10:11], 0, 0, 0, 0 ; D2000001 002900F2 V_ADD_F32_e32 v1, v20, v1 ; 06020314 V_MUL_F32_e32 v20, v1, v4 ; 10280901 V_MAD_F32 v7, v7, v6, v20, 0, 0 ; D2820007 04520D07 V_MAD_F32 v0, v0, v10, v17, 0, 0 ; D2820000 04461500 V_MUL_F32_e32 v0, 1.300000e+01, v0 ; 100000FF 41500000 V_MUL_F32_e32 v0, v5, v0 ; 10000105 V_FRACT_F32_e32 v0, v0 ; 7E004100 V_MUL_F32_e32 v0, v0, v0 ; 10000100 V_MUL_F32_e32 v5, 1.860500e+03, v0 ; 100A00FF 44E89000 V_FRACT_F32_e32 v5, v5 ; 7E0A4105 V_ADD_F32_e32 v5, -5.000000e-01, v5 ; 060A0AF1 V_CMP_GE_F32_e64 s[10:11], v5, 0.000000e+00, 0, 0 ; D00C000A 00010105 V_CNDMASK_B32_e64 v10, -1.000000e+00, v13, s[10:11], 0, 0, 0, 0 ; D200000A 002A1AF3 V_XOR_B32_e32 v5, v5, v15 ; 3A0A1F05 V_CMP_GE_F32_e64 s[10:11], v5, 0.000000e+00, 0, 0 ; D00C000A 00010105 V_CNDMASK_B32_e64 v5, 1.000000e+00, 0, s[10:11], 0, 0, 0, 0 ; D2000005 002900F2 V_ADD_F32_e32 v5, v10, v5 ; 060A0B0A V_MUL_F32_e32 v0, 9.302500e+02, v0 ; 100000FF 44689000 V_FRACT_F32_e32 v0, v0 ; 7E004100 V_ADD_F32_e32 v0, -5.000000e-01, v0 ; 060000F1 V_CMP_GE_F32_e64 s[10:11], v0, 0.000000e+00, 0, 0 ; D00C000A 00010100 V_CNDMASK_B32_e64 v10, -1.000000e+00, v13, s[10:11], 0, 0, 0, 0 ; D200000A 002A1AF3 V_XOR_B32_e32 v0, v0, v15 ; 3A001F00 V_CMP_GE_F32_e64 s[10:11], v0, 0.000000e+00, 0, 0 ; D00C000A 00010100 V_CNDMASK_B32_e64 v0, 1.000000e+00, 0, s[10:11], 0, 0, 0, 0 ; D2000000 002900F2 V_ADD_F32_e32 v0, v10, v0 ; 0600010A V_MUL_F32_e32 v10, v0, v4 ; 10140900 V_MAD_F32 v10, v19, v5, v10, 0, 0 ; D282000A 042A0B13 V_SUB_F32_e32 v13, v10, v7 ; 081A0F0A V_MUL_F32_e32 v6, v23, v6 ; 100C0D17 V_MAD_F32 v5, v22, v5, v6, 0, 0 ; D2820005 041A0B16 V_MAD_F32 v5, v13, v25, v5, 0, 0 ; D2820005 0416330D V_MAD_F32 v6, v4, -2.000000e+00, v21, 0, 0 ; D2820006 0455EB04 V_MUL_F32_e32 v13, v4, v4 ; 101A0904 V_MUL_F32_e32 v6, v6, v13 ; 100C1B06 V_SUB_F32_e32 v13, 1.000000e+00, v6 ; 081A0CF2 V_MUL_F32_e32 v5, v13, v5 ; 100A0B0D V_MAD_F32 v5, v6, v12, v5, 0, 0 ; D2820005 04161906 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x4 ; C20A0104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s20, v5 ; 100A0A14 V_MAD_F32 v5, v5, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820005 03C1E105 V_RCP_F32_e32 v12, s9 ; 7E185409 V_SUB_F32_e32 v20, v8, v12 ; 08281908 V_RCP_F32_e32 v17, s8 ; 7E225408 V_ADD_F32_e32 v19, v17, v2 ; 06260511 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x0 ; C0840500 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x0 ; C0C60700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v21, 1, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[12:19], s[8:11] ; F0800100 00431513 V_SUB_F32_e32 v25, v2, v17 ; 08322302 V_MOV_B32_e32 v26, v20 ; 7E340314 IMAGE_SAMPLE v17, 1, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[12:19], s[8:11] ; F0800100 00431119 S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v27, v17, v21 ; 08362B11 V_MOV_B32_e32 v26, v8 ; 7E340308 IMAGE_SAMPLE v28, 1, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[12:19], s[8:11] ; F0800100 00431C19 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v28, v28, v28 ; 0638391C V_ADD_F32_e32 v27, v27, v28 ; 0636391B V_ADD_F32_e32 v26, v12, v8 ; 0634110C V_MOV_B32_e32 v28, v19 ; 7E380313 V_MOV_B32_e32 v29, v20 ; 7E3A0314 V_MOV_B32_e32 v29, v26 ; 7E3A031A IMAGE_SAMPLE v12, 1, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[12:19], s[8:11] ; F0800100 00430C1C IMAGE_SAMPLE v28, 1, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[12:19], s[8:11] ; F0800100 00431C19 S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v29, v28, v12 ; 083A191C V_MOV_B32_e32 v30, v19 ; 7E3C0313 V_MOV_B32_e32 v31, v20 ; 7E3E0314 V_MOV_B32_e32 v31, v8 ; 7E3E0308 IMAGE_SAMPLE v8, 1, 0, 0, 0, 0, 0, 0, 0, v[30:31], s[12:19], s[8:11] ; F0800100 0043081E S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v8, v8, -2.000000e+00, v29, 0, 0 ; D2820008 0475EB08 V_ADD_F32_e32 v8, v8, v27 ; 06103708 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s0, v8 ; 10101000 V_MAD_F32 v27, v8, 5.000000e-01, 5.000000e-01, 0, 0 ; D282001B 03C1E108 V_SUB_F32_e32 v27, v27, v5 ; 08360B1B V_ADD_F32_e32 v17, v21, v17 ; 06222315 V_MOV_B32_e32 v3, v20 ; 7E060314 IMAGE_SAMPLE v19, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800100 00431302 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v19, v19, v19 ; 06262713 V_ADD_F32_e32 v17, v17, v19 ; 06222711 V_XOR_B32_e32 v12, v12, v15 ; 3A181F0C V_SUB_F32_e32 v12, v12, v28 ; 0818390C V_MOV_B32_e32 v3, v26 ; 7E06031A IMAGE_SAMPLE v2, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800100 00430202 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v2, v2, -2.000000e+00, v12, 0, 0 ; D2820002 0431EB02 V_ADD_F32_e32 v2, v2, v17 ; 06042302 V_MUL_F32_e32 v2, s0, v2 ; 10040400 V_MUL_F32_e32 v3, v2, v2 ; 10060502 V_MAD_F32 v3, v8, v8, v3, 0, 0 ; D2820003 040E1108 V_MOV_B32_e32 v8, 1.000000e-07 ; 7E1002FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v3, v8 ; 7C0C1103 V_CMP_U_F32_e64 s[0:1], v3, v3, 0, 0 ; D0100000 00020703 V_CNDMASK_B32_e64 v8, 0, -1, vcc, 0, 0, 0, 0 ; D2000008 01A98280 V_CNDMASK_B32_e64 v12, 0, -1, s[0:1], 0, 0, 0, 0 ; D200000C 00018280 V_OR_B32_e32 v8, v8, v12 ; 38101908 V_MOV_B32_e32 v12, 0x33d6bf95 ; 7E1802FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v8, 0, 0, 0 ; D10A0000 00010108 V_CNDMASK_B32_e64 v3, v12, v3, s[0:1], 0, 0, 0, 0 ; D2000003 0002070C V_RSQ_CLAMP_F32_e32 v8, v3 ; 7E105903 V_MUL_F32_e32 v8, v8, v3 ; 10100708 V_XOR_B32_e32 v3, v3, v15 ; 3A061F03 V_CMP_GT_F32_e32 vcc, 0, v3 ; 7C080680 V_CNDMASK_B32_e64 v3, 0.000000e+00, v8, vcc, 0, 0, 0, 0 ; D2000003 01AA1080 V_ADD_F32_e64 v8, v3, 0, 1, 0 ; D2060808 00010103 V_MAD_F32 v5, v8, v27, v5, 0, 0 ; D2820005 04163708 V_SUB_F32_e32 v7, v16, v7 ; 080E0F10 V_MUL_F32_e32 v1, v13, v1 ; 1002030D V_MAD_F32 v1, v6, v11, v1, 0, 0 ; D2820001 04061706 V_MAD_F32 v11, v4, v24, v18, 0, 0 ; D282000B 044A3104 V_MUL_F32_e32 v4, v4, v11 ; 10081704 V_MAD_F32 v1, v7, v4, v1, 0, 0 ; D2820001 04060907 V_MUL_F32_e32 v1, v23, v1 ; 10020317 V_SUB_F32_e32 v7, v10, v14 ; 080E1D0A V_MUL_F32_e32 v4, v7, v4 ; 10080907 V_MUL_F32_e32 v0, v13, v0 ; 1000010D V_MAD_F32 v0, v6, v9, v0, 0, 0 ; D2820000 04021306 V_SUB_F32_e32 v0, v4, v0 ; 08000104 V_MUL_F32_e32 v0, v22, v0 ; 10000116 V_SUB_F32_e32 v0, v0, v1 ; 08000300 V_MUL_F32_e32 v0, s20, v0 ; 10000014 V_MAD_F32 v0, v0, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820000 03C1E100 V_MUL_F32_e32 v1, 5.000000e-01, v2 ; 100204F0 V_SUB_F32_e32 v1, 5.000000e-01, v1 ; 080202F0 V_SUB_F32_e32 v1, v1, v0 ; 08020101 V_MAD_F32 v0, v8, v1, v0, 0, 0 ; D2820000 04020308 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v5 ; 5E000B00 V_ADD_F32_e32 v1, -1.000000e+00, v3 ; 060206F3 V_MAD_F32 v1, v8, v1, 1.000000e+00, 0, 0 ; D2820001 03CA0308 V_CVT_PKRTZ_F16_F32_e32 v1, 0.000000e+00, v1 ; 5E020280 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %22, float %23) %31 = bitcast i32 %30 to float %32 = call i32 @llvm.SI.packf16(float %24, float %25) %33 = bitcast i32 %32 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %31, float %33, float %31, float %33) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_MOV_F32 v0, P0, 3, 0, [m0] ; C8020302 V_INTERP_MOV_F32 v1, P0, 2, 0, [m0] ; C8060202 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_INTERP_MOV_F32 v1, P0, 1, 0, [m0] ; C8060102 V_INTERP_MOV_F32 v2, P0, 0, 0, [m0] ; C80A0002 V_CVT_PKRTZ_F16_F32_e32 v1, v2, v1 ; 5E020302 EXP 15, 1, 1, 0, 0, v1, v0, v1, v0 ; F800041F 00010001 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL OUT[7], GENERIC[15] DCL OUT[8], GENERIC[16] DCL CONST[0..96] DCL TEMP[0..9], LOCAL DCL ADDR[0] IMM[0] FLT32 { -1.0000, -2.0000, 0.6600, 0.3300} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[0], IN[0] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MOV TEMP[1].xy, IMM[0].xyxx 3: F2I TEMP[2].x, IN[4].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: ADD TEMP[2].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 6: MOV TEMP[1].zw, TEMP[2].wwzw 7: ABS TEMP[2].z, TEMP[1] 8: ABS TEMP[3].z, TEMP[1] 9: FSGE TEMP[2].x, -TEMP[2].zzzz, TEMP[3].zzzz 10: UIF TEMP[2].xxxx :0 11: F2I TEMP[2].x, IN[4].xxxx 12: UARL ADDR[0].x, TEMP[2].xxxx 13: MUL TEMP[2].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 14: MOV TEMP[2].xy, TEMP[2].xyxx 15: MUL TEMP[3].xy, TEMP[2], IMM[0].zzzz 16: MOV TEMP[2].xy, TEMP[3].xyxx 17: ELSE :0 18: F2I TEMP[3].x, IN[4].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: MUL TEMP[3].xy, IN[1], CONST[ADDR[0].x+1].xxxx 21: MOV TEMP[3].xy, TEMP[3].xyxx 22: MUL TEMP[4].xy, TEMP[3], IMM[0].wwww 23: MOV TEMP[2].xy, TEMP[4].xyxx 24: ENDIF 25: ABS TEMP[4].w, TEMP[1] 26: ABS TEMP[5].w, TEMP[1] 27: FSGE TEMP[4].x, -TEMP[4].wwww, TEMP[5].wwww 28: UIF TEMP[4].xxxx :0 29: F2I TEMP[4].x, IN[4].xxxx 30: UARL ADDR[0].x, TEMP[4].xxxx 31: MUL TEMP[4].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 32: MUL TEMP[4].xy, TEMP[4].zwzw, IMM[0].zzzz 33: MOV TEMP[2].xy, TEMP[4].xyxx 34: ENDIF 35: F2I TEMP[4].x, IN[4].yyyy 36: UARL ADDR[0].x, TEMP[4].xxxx 37: ADD TEMP[4].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 38: MOV TEMP[1].zw, TEMP[4].wwzw 39: ABS TEMP[4].z, TEMP[1] 40: ABS TEMP[5].z, TEMP[1] 41: FSGE TEMP[4].x, -TEMP[4].zzzz, TEMP[5].zzzz 42: UIF TEMP[4].xxxx :0 43: F2I TEMP[4].x, IN[4].yyyy 44: UARL ADDR[0].x, TEMP[4].xxxx 45: MUL TEMP[4].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 46: MOV TEMP[3].xy, TEMP[4].xyxx 47: MUL TEMP[4].xy, TEMP[3], IMM[0].zzzz 48: MOV TEMP[3].xy, TEMP[4].xyxx 49: ELSE :0 50: F2I TEMP[4].x, IN[4].yyyy 51: UARL ADDR[0].x, TEMP[4].xxxx 52: MUL TEMP[4].xy, IN[1], CONST[ADDR[0].x+1].xxxx 53: MOV TEMP[4].xy, TEMP[4].xyxx 54: MUL TEMP[5].xy, TEMP[4], IMM[0].wwww 55: MOV TEMP[3].xy, TEMP[5].xyxx 56: ENDIF 57: ABS TEMP[5].w, TEMP[1] 58: ABS TEMP[6].w, TEMP[1] 59: FSGE TEMP[5].x, -TEMP[5].wwww, TEMP[6].wwww 60: UIF TEMP[5].xxxx :0 61: F2I TEMP[5].x, IN[4].yyyy 62: UARL ADDR[0].x, TEMP[5].xxxx 63: MUL TEMP[5].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 64: MUL TEMP[5].xy, TEMP[5].zwzw, IMM[0].zzzz 65: MOV TEMP[3].xy, TEMP[5].xyxx 66: ENDIF 67: F2I TEMP[5].x, IN[4].zzzz 68: UARL ADDR[0].x, TEMP[5].xxxx 69: ADD TEMP[5].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 70: MOV TEMP[1].zw, TEMP[5].wwzw 71: ABS TEMP[5].z, TEMP[1] 72: ABS TEMP[6].z, TEMP[1] 73: FSGE TEMP[5].x, -TEMP[5].zzzz, TEMP[6].zzzz 74: UIF TEMP[5].xxxx :0 75: F2I TEMP[5].x, IN[4].zzzz 76: UARL ADDR[0].x, TEMP[5].xxxx 77: MUL TEMP[5].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 78: MOV TEMP[4].xy, TEMP[5].xyxx 79: MUL TEMP[5].xy, TEMP[4], IMM[0].zzzz 80: MOV TEMP[4].xy, TEMP[5].xyxx 81: ELSE :0 82: F2I TEMP[5].x, IN[4].zzzz 83: UARL ADDR[0].x, TEMP[5].xxxx 84: MUL TEMP[5].xy, IN[1], CONST[ADDR[0].x+1].xxxx 85: MOV TEMP[5].xy, TEMP[5].xyxx 86: MUL TEMP[6].xy, TEMP[5], IMM[0].wwww 87: MOV TEMP[4].xy, TEMP[6].xyxx 88: ENDIF 89: ABS TEMP[6].w, TEMP[1] 90: ABS TEMP[7].w, TEMP[1] 91: FSGE TEMP[6].x, -TEMP[6].wwww, TEMP[7].wwww 92: UIF TEMP[6].xxxx :0 93: F2I TEMP[6].x, IN[4].zzzz 94: UARL ADDR[0].x, TEMP[6].xxxx 95: MUL TEMP[6].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 96: MUL TEMP[6].xy, TEMP[6].zwzw, IMM[0].zzzz 97: MOV TEMP[4].xy, TEMP[6].xyxx 98: ENDIF 99: F2I TEMP[6].x, IN[4].wwww 100: UARL ADDR[0].x, TEMP[6].xxxx 101: ADD TEMP[6].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 102: MOV TEMP[1].zw, TEMP[6].wwzw 103: ABS TEMP[6].z, TEMP[1] 104: ABS TEMP[7].z, TEMP[1] 105: FSGE TEMP[6].x, -TEMP[6].zzzz, TEMP[7].zzzz 106: UIF TEMP[6].xxxx :0 107: F2I TEMP[6].x, IN[4].wwww 108: UARL ADDR[0].x, TEMP[6].xxxx 109: MUL TEMP[6].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 110: MOV TEMP[5].xy, TEMP[6].xyxx 111: MUL TEMP[6].xy, TEMP[5], IMM[0].zzzz 112: MOV TEMP[5].xy, TEMP[6].xyxx 113: ELSE :0 114: F2I TEMP[6].x, IN[4].wwww 115: UARL ADDR[0].x, TEMP[6].xxxx 116: MUL TEMP[6].xy, IN[1], CONST[ADDR[0].x+1].xxxx 117: MOV TEMP[6].xy, TEMP[6].xyxx 118: MUL TEMP[7].xy, TEMP[6], IMM[0].wwww 119: MOV TEMP[5].xy, TEMP[7].xyxx 120: ENDIF 121: ABS TEMP[7].w, TEMP[1] 122: ABS TEMP[8].w, TEMP[1] 123: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[8].wwww 124: UIF TEMP[7].xxxx :0 125: F2I TEMP[7].x, IN[4].wwww 126: UARL ADDR[0].x, TEMP[7].xxxx 127: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 128: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].zzzz 129: MOV TEMP[5].xy, TEMP[7].xyxx 130: ENDIF 131: F2I TEMP[7].x, IN[5].xxxx 132: UARL ADDR[0].x, TEMP[7].xxxx 133: ADD TEMP[7].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 134: MOV TEMP[1].zw, TEMP[7].wwzw 135: ABS TEMP[7].z, TEMP[1] 136: ABS TEMP[8].z, TEMP[1] 137: FSGE TEMP[7].x, -TEMP[7].zzzz, TEMP[8].zzzz 138: UIF TEMP[7].xxxx :0 139: F2I TEMP[7].x, IN[5].xxxx 140: UARL ADDR[0].x, TEMP[7].xxxx 141: MUL TEMP[7].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 142: MOV TEMP[6].xy, TEMP[7].xyxx 143: MUL TEMP[7].xy, TEMP[6], IMM[0].zzzz 144: MOV TEMP[6].xy, TEMP[7].xyxx 145: ELSE :0 146: F2I TEMP[7].x, IN[5].xxxx 147: UARL ADDR[0].x, TEMP[7].xxxx 148: MUL TEMP[7].zw, IN[1].xyxy, CONST[ADDR[0].x+1].xxxx 149: MOV TEMP[6].zw, TEMP[7].wwzw 150: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].wwww 151: MOV TEMP[6].xy, TEMP[7].xyxx 152: ENDIF 153: ABS TEMP[7].w, TEMP[1] 154: ABS TEMP[8].w, TEMP[1] 155: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[8].wwww 156: UIF TEMP[7].xxxx :0 157: F2I TEMP[7].x, IN[5].xxxx 158: UARL ADDR[0].x, TEMP[7].xxxx 159: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 160: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].zzzz 161: MOV TEMP[6].xy, TEMP[7].xyxx 162: ENDIF 163: F2I TEMP[7].x, IN[5].yyyy 164: UARL ADDR[0].x, TEMP[7].xxxx 165: ADD TEMP[7].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 166: MOV TEMP[1].zw, TEMP[7].wwzw 167: ABS TEMP[7].z, TEMP[1] 168: ABS TEMP[8].z, TEMP[1] 169: FSGE TEMP[7].x, -TEMP[7].zzzz, TEMP[8].zzzz 170: UIF TEMP[7].xxxx :0 171: F2I TEMP[7].x, IN[5].yyyy 172: UARL ADDR[0].x, TEMP[7].xxxx 173: MUL TEMP[7].zw, IN[1].xyxz, CONST[ADDR[0].x+1].xxxx 174: MOV TEMP[6].zw, TEMP[7].wwzw 175: MUL TEMP[7].zw, TEMP[6], IMM[0].zzzz 176: MOV TEMP[6].zw, TEMP[7].wwzw 177: ELSE :0 178: F2I TEMP[7].x, IN[5].yyyy 179: UARL ADDR[0].x, TEMP[7].xxxx 180: MUL TEMP[7].xy, IN[1], CONST[ADDR[0].x+1].xxxx 181: MOV TEMP[8].xy, TEMP[7].xyxx 182: MUL TEMP[7].zw, TEMP[7].xyxy, IMM[0].wwww 183: MOV TEMP[6].zw, TEMP[7].wwzw 184: ENDIF 185: ABS TEMP[7].w, TEMP[1] 186: ABS TEMP[9].w, TEMP[1] 187: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[9].wwww 188: UIF TEMP[7].xxxx :0 189: F2I TEMP[7].x, IN[5].yyyy 190: UARL ADDR[0].x, TEMP[7].xxxx 191: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 192: MOV TEMP[1].zw, TEMP[7].wwzw 193: MUL TEMP[7].zw, TEMP[1], IMM[0].zzzz 194: MOV TEMP[6].zw, TEMP[7].wwzw 195: ENDIF 196: F2I TEMP[7].x, IN[5].zzzz 197: UARL ADDR[0].x, TEMP[7].xxxx 198: ADD TEMP[7].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 199: MOV TEMP[1].zw, TEMP[7].wwzw 200: ABS TEMP[7].z, TEMP[1] 201: ABS TEMP[9].z, TEMP[1] 202: FSGE TEMP[7].x, -TEMP[7].zzzz, TEMP[9].zzzz 203: UIF TEMP[7].xxxx :0 204: F2I TEMP[7].x, IN[5].zzzz 205: UARL ADDR[0].x, TEMP[7].xxxx 206: MUL TEMP[7].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 207: MOV TEMP[8].xy, TEMP[7].xyxx 208: MUL TEMP[7].xy, TEMP[8], IMM[0].zzzz 209: MOV TEMP[8].xy, TEMP[7].xyxx 210: ELSE :0 211: F2I TEMP[7].x, IN[5].zzzz 212: UARL ADDR[0].x, TEMP[7].xxxx 213: MUL TEMP[7].zw, IN[1].xyxy, CONST[ADDR[0].x+1].xxxx 214: MOV TEMP[8].zw, TEMP[7].wwzw 215: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].wwww 216: MOV TEMP[8].xy, TEMP[7].xyxx 217: ENDIF 218: ABS TEMP[7].w, TEMP[1] 219: ABS TEMP[9].w, TEMP[1] 220: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[9].wwww 221: UIF TEMP[7].xxxx :0 222: F2I TEMP[7].x, IN[5].zzzz 223: UARL ADDR[0].x, TEMP[7].xxxx 224: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 225: MOV TEMP[1].zw, TEMP[7].wwzw 226: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].zzzz 227: MOV TEMP[8].xy, TEMP[7].xyxx 228: ENDIF 229: F2I TEMP[7].x, IN[5].wwww 230: UARL ADDR[0].x, TEMP[7].xxxx 231: ADD TEMP[7].xy, TEMP[1], CONST[ADDR[0].x+1].yyyy 232: MOV TEMP[1].xy, TEMP[7].xyxx 233: ABS TEMP[7].x, TEMP[1] 234: ABS TEMP[9].x, TEMP[1] 235: FSGE TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx 236: UIF TEMP[7].xxxx :0 237: F2I TEMP[7].x, IN[5].wwww 238: UARL ADDR[0].x, TEMP[7].xxxx 239: MUL TEMP[7].xz, IN[1], CONST[ADDR[0].x+1].xxxx 240: MOV TEMP[1].xz, TEMP[7].xxzx 241: MUL TEMP[7].xz, TEMP[1], IMM[0].zzzz 242: MOV TEMP[1].xz, TEMP[7].xxzx 243: ELSE :0 244: F2I TEMP[7].x, IN[5].wwww 245: UARL ADDR[0].x, TEMP[7].xxxx 246: MUL TEMP[7].zw, IN[1].xyxy, CONST[ADDR[0].x+1].xxxx 247: MOV TEMP[8].zw, TEMP[7].wwzw 248: MUL TEMP[7].xz, TEMP[8].zyww, IMM[0].wwww 249: MOV TEMP[1].xz, TEMP[7].xxzx 250: ENDIF 251: ABS TEMP[7].y, TEMP[1] 252: ABS TEMP[9].y, TEMP[1] 253: FSGE TEMP[7].x, -TEMP[7].yyyy, TEMP[9].yyyy 254: UIF TEMP[7].xxxx :0 255: F2I TEMP[7].x, IN[5].wwww 256: UARL ADDR[0].x, TEMP[7].xxxx 257: MUL TEMP[7].yw, IN[1].xyzz, CONST[ADDR[0].x+1].xxxx 258: MUL TEMP[7].xz, TEMP[7].yyww, IMM[0].zzzz 259: MOV TEMP[1].xz, TEMP[7].xxzx 260: ENDIF 261: MAD TEMP[7].zw, IN[0].zzzz, IMM[1].xyxy, IMM[1].xyyx 262: MOV TEMP[0].zw, TEMP[7].wwzw 263: MOV TEMP[2].zw, IN[2].yyxy 264: MOV TEMP[3].zw, IN[2].wwzw 265: MOV TEMP[4].zw, IN[3].yyxy 266: MOV TEMP[5].zw, IN[3].wwzw 267: MOV TEMP[7].xy, TEMP[6].xyxx 268: MOV TEMP[7].zw, IMM[1].yyyy 269: MOV TEMP[6].xy, TEMP[6].zwzz 270: MOV TEMP[6].zw, IMM[1].yyyy 271: MOV TEMP[8].xy, TEMP[8].xyxx 272: MOV TEMP[8].zw, IMM[1].yyyy 273: MOV TEMP[1].xy, TEMP[1].xzxx 274: MOV TEMP[1].zw, IMM[1].yyyy 275: MOV OUT[6], TEMP[6] 276: MOV OUT[7], TEMP[8] 277: MOV OUT[8], TEMP[1] 278: MOV OUT[1], TEMP[2] 279: MOV OUT[2], TEMP[3] 280: MOV OUT[0], TEMP[0] 281: MOV OUT[3], TEMP[4] 282: MOV OUT[4], TEMP[5] 283: MOV OUT[5], TEMP[7] 284: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %16 = load <16 x i8> addrspace(2)* %15, !tbaa !0 %17 = add i32 %5, %7 %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %17) %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = extractelement <4 x float> %18, i32 2 %22 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = add i32 %5, %7 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 5 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fadd float %13, %19 %62 = fadd float %14, %20 %63 = fptosi float %49 to i32 %64 = bitcast i32 %63 to float %65 = bitcast float %64 to i32 %66 = shl i32 %65, 4 %67 = add i32 %66, 20 %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %67) %69 = fadd float -1.000000e+00, %68 %70 = shl i32 %65, 4 %71 = add i32 %70, 20 %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %71) %73 = fadd float -2.000000e+00, %72 %74 = call float @fabs(float %69) %75 = call float @fabs(float %69) %76 = fsub float -0.000000e+00, %74 %77 = fcmp oge float %76, %75 %78 = sext i1 %77 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = icmp ne i32 %80, 0 %82 = fptosi float %49 to i32 %83 = bitcast i32 %82 to float %84 = bitcast float %83 to i32 %85 = shl i32 %84, 4 %86 = add i32 %85, 16 %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %86) %88 = fmul float %26, %87 %89 = shl i32 %84, 4 %90 = add i32 %89, 16 %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %90) br i1 %81, label %IF, label %ELSE IF: ; preds = %main_body %92 = fmul float %28, %91 %93 = fmul float %88, 0x3FE51EB860000000 %94 = fmul float %92, 0x3FE51EB860000000 br label %ENDIF ELSE: ; preds = %main_body %95 = fmul float %27, %91 %96 = fmul float %88, 0x3FD51EB860000000 %97 = fmul float %95, 0x3FD51EB860000000 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp8.0 = phi float [ %93, %IF ], [ %96, %ELSE ] %temp9.0 = phi float [ %94, %IF ], [ %97, %ELSE ] %98 = call float @fabs(float %73) %99 = call float @fabs(float %73) %100 = fsub float -0.000000e+00, %98 %101 = fcmp oge float %100, %99 %102 = sext i1 %101 to i32 %103 = bitcast i32 %102 to float %104 = bitcast float %103 to i32 %105 = icmp ne i32 %104, 0 br i1 %105, label %IF46, label %ENDIF45 IF46: ; preds = %ENDIF %106 = fptosi float %49 to i32 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = shl i32 %108, 4 %110 = add i32 %109, 16 %111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %110) %112 = fmul float %27, %111 %113 = shl i32 %108, 4 %114 = add i32 %113, 16 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %28, %115 %117 = fmul float %112, 0x3FE51EB860000000 %118 = fmul float %116, 0x3FE51EB860000000 br label %ENDIF45 ENDIF45: ; preds = %ENDIF, %IF46 %temp8.1 = phi float [ %117, %IF46 ], [ %temp8.0, %ENDIF ] %temp9.1 = phi float [ %118, %IF46 ], [ %temp9.0, %ENDIF ] %119 = fptosi float %50 to i32 %120 = bitcast i32 %119 to float %121 = bitcast float %120 to i32 %122 = shl i32 %121, 4 %123 = add i32 %122, 20 %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %123) %125 = fadd float -1.000000e+00, %124 %126 = shl i32 %121, 4 %127 = add i32 %126, 20 %128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %127) %129 = fadd float -2.000000e+00, %128 %130 = call float @fabs(float %125) %131 = call float @fabs(float %125) %132 = fsub float -0.000000e+00, %130 %133 = fcmp oge float %132, %131 %134 = sext i1 %133 to i32 %135 = bitcast i32 %134 to float %136 = bitcast float %135 to i32 %137 = icmp ne i32 %136, 0 %138 = fptosi float %50 to i32 %139 = bitcast i32 %138 to float %140 = bitcast float %139 to i32 %141 = shl i32 %140, 4 %142 = add i32 %141, 16 %143 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %142) %144 = fmul float %26, %143 %145 = shl i32 %140, 4 %146 = add i32 %145, 16 %147 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %146) br i1 %137, label %IF53, label %ELSE54 IF53: ; preds = %ENDIF45 %148 = fmul float %28, %147 %149 = fmul float %144, 0x3FE51EB860000000 %150 = fmul float %148, 0x3FE51EB860000000 br label %ENDIF52 ELSE54: ; preds = %ENDIF45 %151 = fmul float %27, %147 %152 = fmul float %144, 0x3FD51EB860000000 %153 = fmul float %151, 0x3FD51EB860000000 br label %ENDIF52 ENDIF52: ; preds = %ELSE54, %IF53 %temp12.0 = phi float [ %149, %IF53 ], [ %152, %ELSE54 ] %temp13.0 = phi float [ %150, %IF53 ], [ %153, %ELSE54 ] %154 = call float @fabs(float %129) %155 = call float @fabs(float %129) %156 = fsub float -0.000000e+00, %154 %157 = fcmp oge float %156, %155 %158 = sext i1 %157 to i32 %159 = bitcast i32 %158 to float %160 = bitcast float %159 to i32 %161 = icmp ne i32 %160, 0 br i1 %161, label %IF60, label %ENDIF59 IF60: ; preds = %ENDIF52 %162 = fptosi float %50 to i32 %163 = bitcast i32 %162 to float %164 = bitcast float %163 to i32 %165 = shl i32 %164, 4 %166 = add i32 %165, 16 %167 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %166) %168 = fmul float %27, %167 %169 = shl i32 %164, 4 %170 = add i32 %169, 16 %171 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %170) %172 = fmul float %28, %171 %173 = fmul float %168, 0x3FE51EB860000000 %174 = fmul float %172, 0x3FE51EB860000000 br label %ENDIF59 ENDIF59: ; preds = %ENDIF52, %IF60 %temp12.1 = phi float [ %173, %IF60 ], [ %temp12.0, %ENDIF52 ] %temp13.1 = phi float [ %174, %IF60 ], [ %temp13.0, %ENDIF52 ] %175 = fptosi float %51 to i32 %176 = bitcast i32 %175 to float %177 = bitcast float %176 to i32 %178 = shl i32 %177, 4 %179 = add i32 %178, 20 %180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %179) %181 = fadd float -1.000000e+00, %180 %182 = shl i32 %177, 4 %183 = add i32 %182, 20 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = fadd float -2.000000e+00, %184 %186 = call float @fabs(float %181) %187 = call float @fabs(float %181) %188 = fsub float -0.000000e+00, %186 %189 = fcmp oge float %188, %187 %190 = sext i1 %189 to i32 %191 = bitcast i32 %190 to float %192 = bitcast float %191 to i32 %193 = icmp ne i32 %192, 0 %194 = fptosi float %51 to i32 %195 = bitcast i32 %194 to float %196 = bitcast float %195 to i32 %197 = shl i32 %196, 4 %198 = add i32 %197, 16 %199 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %198) %200 = fmul float %26, %199 %201 = shl i32 %196, 4 %202 = add i32 %201, 16 %203 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %202) br i1 %193, label %IF67, label %ELSE68 IF67: ; preds = %ENDIF59 %204 = fmul float %28, %203 %205 = fmul float %200, 0x3FE51EB860000000 %206 = fmul float %204, 0x3FE51EB860000000 br label %ENDIF66 ELSE68: ; preds = %ENDIF59 %207 = fmul float %27, %203 %208 = fmul float %200, 0x3FD51EB860000000 %209 = fmul float %207, 0x3FD51EB860000000 br label %ENDIF66 ENDIF66: ; preds = %ELSE68, %IF67 %temp16.0 = phi float [ %205, %IF67 ], [ %208, %ELSE68 ] %temp17.0 = phi float [ %206, %IF67 ], [ %209, %ELSE68 ] %210 = call float @fabs(float %185) %211 = call float @fabs(float %185) %212 = fsub float -0.000000e+00, %210 %213 = fcmp oge float %212, %211 %214 = sext i1 %213 to i32 %215 = bitcast i32 %214 to float %216 = bitcast float %215 to i32 %217 = icmp ne i32 %216, 0 br i1 %217, label %IF74, label %ENDIF73 IF74: ; preds = %ENDIF66 %218 = fptosi float %51 to i32 %219 = bitcast i32 %218 to float %220 = bitcast float %219 to i32 %221 = shl i32 %220, 4 %222 = add i32 %221, 16 %223 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %222) %224 = fmul float %27, %223 %225 = shl i32 %220, 4 %226 = add i32 %225, 16 %227 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %226) %228 = fmul float %28, %227 %229 = fmul float %224, 0x3FE51EB860000000 %230 = fmul float %228, 0x3FE51EB860000000 br label %ENDIF73 ENDIF73: ; preds = %ENDIF66, %IF74 %temp16.1 = phi float [ %229, %IF74 ], [ %temp16.0, %ENDIF66 ] %temp17.1 = phi float [ %230, %IF74 ], [ %temp17.0, %ENDIF66 ] %231 = fptosi float %52 to i32 %232 = bitcast i32 %231 to float %233 = bitcast float %232 to i32 %234 = shl i32 %233, 4 %235 = add i32 %234, 20 %236 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %235) %237 = fadd float -1.000000e+00, %236 %238 = shl i32 %233, 4 %239 = add i32 %238, 20 %240 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %239) %241 = fadd float -2.000000e+00, %240 %242 = call float @fabs(float %237) %243 = call float @fabs(float %237) %244 = fsub float -0.000000e+00, %242 %245 = fcmp oge float %244, %243 %246 = sext i1 %245 to i32 %247 = bitcast i32 %246 to float %248 = bitcast float %247 to i32 %249 = icmp ne i32 %248, 0 %250 = fptosi float %52 to i32 %251 = bitcast i32 %250 to float %252 = bitcast float %251 to i32 %253 = shl i32 %252, 4 %254 = add i32 %253, 16 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = fmul float %26, %255 %257 = shl i32 %252, 4 %258 = add i32 %257, 16 %259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %258) br i1 %249, label %IF81, label %ELSE82 IF81: ; preds = %ENDIF73 %260 = fmul float %28, %259 %261 = fmul float %256, 0x3FE51EB860000000 %262 = fmul float %260, 0x3FE51EB860000000 br label %ENDIF80 ELSE82: ; preds = %ENDIF73 %263 = fmul float %27, %259 %264 = fmul float %256, 0x3FD51EB860000000 %265 = fmul float %263, 0x3FD51EB860000000 br label %ENDIF80 ENDIF80: ; preds = %ELSE82, %IF81 %temp20.0 = phi float [ %261, %IF81 ], [ %264, %ELSE82 ] %temp21.0 = phi float [ %262, %IF81 ], [ %265, %ELSE82 ] %266 = call float @fabs(float %241) %267 = call float @fabs(float %241) %268 = fsub float -0.000000e+00, %266 %269 = fcmp oge float %268, %267 %270 = sext i1 %269 to i32 %271 = bitcast i32 %270 to float %272 = bitcast float %271 to i32 %273 = icmp ne i32 %272, 0 br i1 %273, label %IF88, label %ENDIF87 IF88: ; preds = %ENDIF80 %274 = fptosi float %52 to i32 %275 = bitcast i32 %274 to float %276 = bitcast float %275 to i32 %277 = shl i32 %276, 4 %278 = add i32 %277, 16 %279 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %278) %280 = fmul float %27, %279 %281 = shl i32 %276, 4 %282 = add i32 %281, 16 %283 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %282) %284 = fmul float %28, %283 %285 = fmul float %280, 0x3FE51EB860000000 %286 = fmul float %284, 0x3FE51EB860000000 br label %ENDIF87 ENDIF87: ; preds = %ENDIF80, %IF88 %temp20.1 = phi float [ %285, %IF88 ], [ %temp20.0, %ENDIF80 ] %temp21.1 = phi float [ %286, %IF88 ], [ %temp21.0, %ENDIF80 ] %287 = fptosi float %57 to i32 %288 = bitcast i32 %287 to float %289 = bitcast float %288 to i32 %290 = shl i32 %289, 4 %291 = add i32 %290, 20 %292 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %291) %293 = fadd float -1.000000e+00, %292 %294 = shl i32 %289, 4 %295 = add i32 %294, 20 %296 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %295) %297 = fadd float -2.000000e+00, %296 %298 = call float @fabs(float %293) %299 = call float @fabs(float %293) %300 = fsub float -0.000000e+00, %298 %301 = fcmp oge float %300, %299 %302 = sext i1 %301 to i32 %303 = bitcast i32 %302 to float %304 = bitcast float %303 to i32 %305 = icmp ne i32 %304, 0 %306 = fptosi float %57 to i32 %307 = bitcast i32 %306 to float %308 = bitcast float %307 to i32 %309 = shl i32 %308, 4 %310 = add i32 %309, 16 %311 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %310) %312 = fmul float %26, %311 %313 = shl i32 %308, 4 %314 = add i32 %313, 16 %315 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %314) br i1 %305, label %IF95, label %ELSE96 IF95: ; preds = %ENDIF87 %316 = fmul float %28, %315 %317 = fmul float %312, 0x3FE51EB860000000 %318 = fmul float %316, 0x3FE51EB860000000 br label %ENDIF94 ELSE96: ; preds = %ENDIF87 %319 = fmul float %27, %315 %320 = fmul float %312, 0x3FD51EB860000000 %321 = fmul float %319, 0x3FD51EB860000000 br label %ENDIF94 ENDIF94: ; preds = %ELSE96, %IF95 %temp24.0 = phi float [ %317, %IF95 ], [ %320, %ELSE96 ] %temp25.0 = phi float [ %318, %IF95 ], [ %321, %ELSE96 ] %322 = call float @fabs(float %297) %323 = call float @fabs(float %297) %324 = fsub float -0.000000e+00, %322 %325 = fcmp oge float %324, %323 %326 = sext i1 %325 to i32 %327 = bitcast i32 %326 to float %328 = bitcast float %327 to i32 %329 = icmp ne i32 %328, 0 br i1 %329, label %IF102, label %ENDIF101 IF102: ; preds = %ENDIF94 %330 = fptosi float %57 to i32 %331 = bitcast i32 %330 to float %332 = bitcast float %331 to i32 %333 = shl i32 %332, 4 %334 = add i32 %333, 16 %335 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %334) %336 = fmul float %27, %335 %337 = shl i32 %332, 4 %338 = add i32 %337, 16 %339 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %338) %340 = fmul float %28, %339 %341 = fmul float %336, 0x3FE51EB860000000 %342 = fmul float %340, 0x3FE51EB860000000 br label %ENDIF101 ENDIF101: ; preds = %ENDIF94, %IF102 %temp24.1 = phi float [ %341, %IF102 ], [ %temp24.0, %ENDIF94 ] %temp25.1 = phi float [ %342, %IF102 ], [ %temp25.0, %ENDIF94 ] %343 = fptosi float %58 to i32 %344 = bitcast i32 %343 to float %345 = bitcast float %344 to i32 %346 = shl i32 %345, 4 %347 = add i32 %346, 20 %348 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %347) %349 = fadd float -1.000000e+00, %348 %350 = shl i32 %345, 4 %351 = add i32 %350, 20 %352 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %351) %353 = fadd float -2.000000e+00, %352 %354 = call float @fabs(float %349) %355 = call float @fabs(float %349) %356 = fsub float -0.000000e+00, %354 %357 = fcmp oge float %356, %355 %358 = sext i1 %357 to i32 %359 = bitcast i32 %358 to float %360 = bitcast float %359 to i32 %361 = icmp ne i32 %360, 0 %362 = fptosi float %58 to i32 %363 = bitcast i32 %362 to float %364 = bitcast float %363 to i32 %365 = shl i32 %364, 4 %366 = add i32 %365, 16 %367 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %366) %368 = fmul float %26, %367 %369 = shl i32 %364, 4 %370 = add i32 %369, 16 %371 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %370) br i1 %361, label %IF109, label %ELSE110 IF109: ; preds = %ENDIF101 %372 = fmul float %28, %371 %373 = fmul float %368, 0x3FE51EB860000000 %374 = fmul float %372, 0x3FE51EB860000000 br label %ENDIF108 ELSE110: ; preds = %ENDIF101 %375 = fmul float %27, %371 %376 = fmul float %368, 0x3FD51EB860000000 %377 = fmul float %375, 0x3FD51EB860000000 br label %ENDIF108 ENDIF108: ; preds = %ELSE110, %IF109 %temp26.0 = phi float [ %373, %IF109 ], [ %376, %ELSE110 ] %temp27.0 = phi float [ %374, %IF109 ], [ %377, %ELSE110 ] %378 = call float @fabs(float %353) %379 = call float @fabs(float %353) %380 = fsub float -0.000000e+00, %378 %381 = fcmp oge float %380, %379 %382 = sext i1 %381 to i32 %383 = bitcast i32 %382 to float %384 = bitcast float %383 to i32 %385 = icmp ne i32 %384, 0 br i1 %385, label %IF116, label %ENDIF115 IF116: ; preds = %ENDIF108 %386 = fptosi float %58 to i32 %387 = bitcast i32 %386 to float %388 = bitcast float %387 to i32 %389 = shl i32 %388, 4 %390 = add i32 %389, 16 %391 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %390) %392 = fmul float %27, %391 %393 = shl i32 %388, 4 %394 = add i32 %393, 16 %395 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %394) %396 = fmul float %28, %395 %397 = fmul float %392, 0x3FE51EB860000000 %398 = fmul float %396, 0x3FE51EB860000000 br label %ENDIF115 ENDIF115: ; preds = %ENDIF108, %IF116 %temp26.1 = phi float [ %397, %IF116 ], [ %temp26.0, %ENDIF108 ] %temp27.1 = phi float [ %398, %IF116 ], [ %temp27.0, %ENDIF108 ] %399 = fptosi float %59 to i32 %400 = bitcast i32 %399 to float %401 = bitcast float %400 to i32 %402 = shl i32 %401, 4 %403 = add i32 %402, 20 %404 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %403) %405 = fadd float -1.000000e+00, %404 %406 = shl i32 %401, 4 %407 = add i32 %406, 20 %408 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %407) %409 = fadd float -2.000000e+00, %408 %410 = call float @fabs(float %405) %411 = call float @fabs(float %405) %412 = fsub float -0.000000e+00, %410 %413 = fcmp oge float %412, %411 %414 = sext i1 %413 to i32 %415 = bitcast i32 %414 to float %416 = bitcast float %415 to i32 %417 = icmp ne i32 %416, 0 %418 = fptosi float %59 to i32 %419 = bitcast i32 %418 to float %420 = bitcast float %419 to i32 %421 = shl i32 %420, 4 %422 = add i32 %421, 16 %423 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %422) %424 = fmul float %26, %423 %425 = shl i32 %420, 4 %426 = add i32 %425, 16 %427 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %426) br i1 %417, label %IF123, label %ELSE124 IF123: ; preds = %ENDIF115 %428 = fmul float %28, %427 %429 = fmul float %424, 0x3FE51EB860000000 %430 = fmul float %428, 0x3FE51EB860000000 br label %ENDIF122 ELSE124: ; preds = %ENDIF115 %431 = fmul float %27, %427 %432 = fmul float %424, 0x3FD51EB860000000 %433 = fmul float %431, 0x3FD51EB860000000 br label %ENDIF122 ENDIF122: ; preds = %ELSE124, %IF123 %temp32.0 = phi float [ %429, %IF123 ], [ %432, %ELSE124 ] %temp33.0 = phi float [ %430, %IF123 ], [ %433, %ELSE124 ] %434 = call float @fabs(float %409) %435 = call float @fabs(float %409) %436 = fsub float -0.000000e+00, %434 %437 = fcmp oge float %436, %435 %438 = sext i1 %437 to i32 %439 = bitcast i32 %438 to float %440 = bitcast float %439 to i32 %441 = icmp ne i32 %440, 0 br i1 %441, label %IF130, label %ENDIF129 IF130: ; preds = %ENDIF122 %442 = fptosi float %59 to i32 %443 = bitcast i32 %442 to float %444 = bitcast float %443 to i32 %445 = shl i32 %444, 4 %446 = add i32 %445, 16 %447 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %446) %448 = fmul float %27, %447 %449 = shl i32 %444, 4 %450 = add i32 %449, 16 %451 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %450) %452 = fmul float %28, %451 %453 = fmul float %448, 0x3FE51EB860000000 %454 = fmul float %452, 0x3FE51EB860000000 br label %ENDIF129 ENDIF129: ; preds = %ENDIF122, %IF130 %temp32.1 = phi float [ %453, %IF130 ], [ %temp32.0, %ENDIF122 ] %temp33.1 = phi float [ %454, %IF130 ], [ %temp33.0, %ENDIF122 ] %455 = fptosi float %60 to i32 %456 = bitcast i32 %455 to float %457 = bitcast float %456 to i32 %458 = shl i32 %457, 4 %459 = add i32 %458, 20 %460 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %459) %461 = fadd float -1.000000e+00, %460 %462 = shl i32 %457, 4 %463 = add i32 %462, 20 %464 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %463) %465 = fadd float -2.000000e+00, %464 %466 = call float @fabs(float %461) %467 = call float @fabs(float %461) %468 = fsub float -0.000000e+00, %466 %469 = fcmp oge float %468, %467 %470 = sext i1 %469 to i32 %471 = bitcast i32 %470 to float %472 = bitcast float %471 to i32 %473 = icmp ne i32 %472, 0 %474 = fptosi float %60 to i32 %475 = bitcast i32 %474 to float %476 = bitcast float %475 to i32 %477 = shl i32 %476, 4 %478 = add i32 %477, 16 %479 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %478) %480 = fmul float %26, %479 %481 = shl i32 %476, 4 %482 = add i32 %481, 16 %483 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %482) br i1 %473, label %IF137, label %ELSE138 IF137: ; preds = %ENDIF129 %484 = fmul float %28, %483 %485 = fmul float %480, 0x3FE51EB860000000 %486 = fmul float %484, 0x3FE51EB860000000 br label %ENDIF136 ELSE138: ; preds = %ENDIF129 %487 = fmul float %27, %483 %488 = fmul float %480, 0x3FD51EB860000000 %489 = fmul float %487, 0x3FD51EB860000000 br label %ENDIF136 ENDIF136: ; preds = %ELSE138, %IF137 %temp4.0 = phi float [ %485, %IF137 ], [ %488, %ELSE138 ] %temp6.0 = phi float [ %486, %IF137 ], [ %489, %ELSE138 ] %490 = call float @fabs(float %465) %491 = call float @fabs(float %465) %492 = fsub float -0.000000e+00, %490 %493 = fcmp oge float %492, %491 %494 = sext i1 %493 to i32 %495 = bitcast i32 %494 to float %496 = bitcast float %495 to i32 %497 = icmp ne i32 %496, 0 br i1 %497, label %IF144, label %ENDIF143 IF144: ; preds = %ENDIF136 %498 = fptosi float %60 to i32 %499 = bitcast i32 %498 to float %500 = bitcast float %499 to i32 %501 = shl i32 %500, 4 %502 = add i32 %501, 16 %503 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %502) %504 = fmul float %27, %503 %505 = shl i32 %500, 4 %506 = add i32 %505, 16 %507 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %506) %508 = fmul float %28, %507 %509 = fmul float %504, 0x3FE51EB860000000 %510 = fmul float %508, 0x3FE51EB860000000 br label %ENDIF143 ENDIF143: ; preds = %ENDIF136, %IF144 %temp4.1 = phi float [ %509, %IF144 ], [ %temp4.0, %ENDIF136 ] %temp6.1 = phi float [ %510, %IF144 ], [ %temp6.0, %ENDIF136 ] %511 = fmul float %21, 1.000000e+00 %512 = fadd float %511, 0.000000e+00 %513 = fmul float %21, 0.000000e+00 %514 = fadd float %513, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %temp8.1, float %temp9.1, float %33, float %34) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %temp12.1, float %temp13.1, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp16.1, float %temp17.1, float %41, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %temp20.1, float %temp21.1, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %temp24.1, float %temp25.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %temp26.1, float %temp27.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %temp32.1, float %temp33.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %temp4.1, float %temp6.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %61, float %62, float %512, float %514) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x10 ; C0820910 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[28:31], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80011C00 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_I32_F32_e32 v1, v28 ; 7E02111C V_LSHLREV_B32_e32 v1, 4, v1 ; 34020284 V_ADD_I32_e32 v2, 16, v1 ; 4A040290 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v23, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001702 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[8:11], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010800 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v24, v8, v23 ; 10302F08 V_ADD_I32_e32 v1, 20, v1 ; 4A020294 BUFFER_LOAD_DWORD v22, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001601 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v1, -1.000000e+00, v22 ; 06022CF3 V_MOV_B32_e32 v2, 0x7fffffff ; 7E0402FF 7FFFFFFF V_AND_B32_e32 v2, v1, v2 ; 36040501 V_OR_B32_e32 v1, 0x80000000, v1 ; 380202FF 80000000 V_CMP_GE_F32_e32 vcc, v1, v2 ; 7C0C0501 V_CNDMASK_B32_e64 v1, 0, -1, vcc, 0, 0, 0, 0 ; D2000001 01A98280 V_CMP_EQ_I32_e64 s[4:5], v1, 0, 0, 0 ; D1040004 00010101 S_LOAD_DWORDX4 s[12:15], s[8:9], 0x14 ; C0860914 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[16:19], s[12:15][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80031000 S_LOAD_DWORDX4 s[12:15], s[8:9], 0xc ; C086090C S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[4:7], s[12:15][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80030400 S_LOAD_DWORDX4 s[12:15], s[8:9], 0x8 ; C0860908 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[12:15], s[12:15][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80030C00 S_LOAD_DWORDX4 s[8:11], s[8:9], 0x0 ; C0840900 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[0:3], s[8:11][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80020000 S_WAITCNT vmcnt(0) ; BF8C0770 S_AND_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842404 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_MUL_F32_e32 v20, v9, v23 ; 10282F09 V_MUL_F32_e32 v20, 3.300000e-01, v20 ; 102828FF 3EA8F5C3 V_MUL_F32_e32 v21, 3.300000e-01, v24 ; 102A30FF 3EA8F5C3 S_OR_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842504 S_XOR_B64 exec, exec, s[4:5] ; 89FE047E V_MUL_F32_e32 v20, v10, v23 ; 10282F0A V_MUL_F32_e32 v20, 6.600000e-01, v20 ; 102828FF 3F28F5C3 V_MUL_F32_e32 v21, 6.600000e-01, v24 ; 102A30FF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_ADD_F32_e32 v22, -2.000000e+00, v22 ; 062C2CF5 V_MOV_B32_e32 v23, 0x7fffffff ; 7E2E02FF 7FFFFFFF V_AND_B32_e32 v23, v22, v23 ; 362E2F16 V_OR_B32_e32 v22, 0x80000000, v22 ; 382C2CFF 80000000 V_CMP_GE_F32_e32 vcc, v22, v23 ; 7C0C2F16 S_AND_SAVEEXEC_B64 s[4:5], vcc ; BE84246A S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_CVT_I32_F32_e32 v20, v28 ; 7E28111C V_LSHLREV_B32_e32 v20, 4, v20 ; 34282884 V_ADD_I32_e32 v20, 16, v20 ; 4A282890 BUFFER_LOAD_DWORD v21, s[0:3] + v20 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001514 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v20, v10, v21 ; 10282B0A V_MUL_F32_e32 v20, 6.600000e-01, v20 ; 102828FF 3F28F5C3 V_MUL_F32_e32 v21, v9, v21 ; 102A2B09 V_MUL_F32_e32 v21, 6.600000e-01, v21 ; 102A2AFF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_CVT_I32_F32_e32 v22, v29 ; 7E2C111D V_LSHLREV_B32_e32 v22, 4, v22 ; 342C2C84 V_ADD_I32_e32 v23, 20, v22 ; 4A2E2C94 BUFFER_LOAD_DWORD v24, s[0:3] + v23 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001817 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v23, -1.000000e+00, v24 ; 062E30F3 V_MOV_B32_e32 v25, 0x7fffffff ; 7E3202FF 7FFFFFFF V_AND_B32_e32 v25, v23, v25 ; 36323317 V_OR_B32_e32 v23, 0x80000000, v23 ; 382E2EFF 80000000 V_CMP_GE_F32_e32 vcc, v23, v25 ; 7C0C3317 V_CNDMASK_B32_e64 v23, 0, -1, vcc, 0, 0, 0, 0 ; D2000017 01A98280 V_CMP_EQ_I32_e64 s[4:5], v23, 0, 0, 0 ; D1040004 00010117 V_ADD_I32_e32 v22, 16, v22 ; 4A2C2C90 BUFFER_LOAD_DWORD v26, s[0:3] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001A16 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v25, v8, v26 ; 10323508 S_AND_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842404 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_MUL_F32_e32 v22, v9, v26 ; 102C3509 V_MUL_F32_e32 v22, 3.300000e-01, v22 ; 102C2CFF 3EA8F5C3 V_MUL_F32_e32 v23, 3.300000e-01, v25 ; 102E32FF 3EA8F5C3 S_OR_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842504 S_XOR_B64 exec, exec, s[4:5] ; 89FE047E V_MUL_F32_e32 v22, v10, v26 ; 102C350A V_MUL_F32_e32 v22, 6.600000e-01, v22 ; 102C2CFF 3F28F5C3 V_MUL_F32_e32 v23, 6.600000e-01, v25 ; 102E32FF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_ADD_F32_e32 v24, -2.000000e+00, v24 ; 063030F5 V_MOV_B32_e32 v25, 0x7fffffff ; 7E3202FF 7FFFFFFF V_AND_B32_e32 v25, v24, v25 ; 36323318 V_OR_B32_e32 v24, 0x80000000, v24 ; 383030FF 80000000 V_CMP_GE_F32_e32 vcc, v24, v25 ; 7C0C3318 S_AND_SAVEEXEC_B64 s[4:5], vcc ; BE84246A S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_CVT_I32_F32_e32 v22, v29 ; 7E2C111D V_LSHLREV_B32_e32 v22, 4, v22 ; 342C2C84 V_ADD_I32_e32 v22, 16, v22 ; 4A2C2C90 BUFFER_LOAD_DWORD v23, s[0:3] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001716 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v22, v10, v23 ; 102C2F0A V_MUL_F32_e32 v22, 6.600000e-01, v22 ; 102C2CFF 3F28F5C3 V_MUL_F32_e32 v23, v9, v23 ; 102E2F09 V_MUL_F32_e32 v23, 6.600000e-01, v23 ; 102E2EFF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_CVT_I32_F32_e32 v24, v30 ; 7E30111E V_LSHLREV_B32_e32 v24, 4, v24 ; 34303084 V_ADD_I32_e32 v25, 20, v24 ; 4A323094 BUFFER_LOAD_DWORD v26, s[0:3] + v25 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001A19 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v25, -1.000000e+00, v26 ; 063234F3 V_MOV_B32_e32 v27, 0x7fffffff ; 7E3602FF 7FFFFFFF V_AND_B32_e32 v27, v25, v27 ; 36363719 V_OR_B32_e32 v25, 0x80000000, v25 ; 383232FF 80000000 V_CMP_GE_F32_e32 vcc, v25, v27 ; 7C0C3719 V_CNDMASK_B32_e64 v25, 0, -1, vcc, 0, 0, 0, 0 ; D2000019 01A98280 V_CMP_EQ_I32_e64 s[4:5], v25, 0, 0, 0 ; D1040004 00010119 V_ADD_I32_e32 v24, 16, v24 ; 4A303090 BUFFER_LOAD_DWORD v32, s[0:3] + v24 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002018 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v27, v8, v32 ; 10364108 S_AND_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842404 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_MUL_F32_e32 v24, v9, v32 ; 10304109 V_MUL_F32_e32 v24, 3.300000e-01, v24 ; 103030FF 3EA8F5C3 V_MUL_F32_e32 v25, 3.300000e-01, v27 ; 103236FF 3EA8F5C3 S_OR_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842504 S_XOR_B64 exec, exec, s[4:5] ; 89FE047E V_MUL_F32_e32 v24, v10, v32 ; 1030410A V_MUL_F32_e32 v24, 6.600000e-01, v24 ; 103030FF 3F28F5C3 V_MUL_F32_e32 v25, 6.600000e-01, v27 ; 103236FF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_ADD_F32_e32 v26, -2.000000e+00, v26 ; 063434F5 V_MOV_B32_e32 v27, 0x7fffffff ; 7E3602FF 7FFFFFFF V_AND_B32_e32 v27, v26, v27 ; 3636371A V_OR_B32_e32 v26, 0x80000000, v26 ; 383434FF 80000000 V_CMP_GE_F32_e32 vcc, v26, v27 ; 7C0C371A S_AND_SAVEEXEC_B64 s[4:5], vcc ; BE84246A S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_CVT_I32_F32_e32 v24, v30 ; 7E30111E V_LSHLREV_B32_e32 v24, 4, v24 ; 34303084 V_ADD_I32_e32 v24, 16, v24 ; 4A303090 BUFFER_LOAD_DWORD v25, s[0:3] + v24 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001918 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v24, v10, v25 ; 1030330A V_MUL_F32_e32 v24, 6.600000e-01, v24 ; 103030FF 3F28F5C3 V_MUL_F32_e32 v25, v9, v25 ; 10323309 V_MUL_F32_e32 v25, 6.600000e-01, v25 ; 103232FF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_CVT_I32_F32_e32 v26, v31 ; 7E34111F V_LSHLREV_B32_e32 v26, 4, v26 ; 34343484 V_ADD_I32_e32 v27, 20, v26 ; 4A363494 BUFFER_LOAD_DWORD v32, s[0:3] + v27 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 8000201B S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v27, -1.000000e+00, v32 ; 063640F3 V_MOV_B32_e32 v33, 0x7fffffff ; 7E4202FF 7FFFFFFF V_AND_B32_e32 v33, v27, v33 ; 3642431B V_OR_B32_e32 v27, 0x80000000, v27 ; 383636FF 80000000 V_CMP_GE_F32_e32 vcc, v27, v33 ; 7C0C431B V_CNDMASK_B32_e64 v27, 0, -1, vcc, 0, 0, 0, 0 ; D200001B 01A98280 V_CMP_EQ_I32_e64 s[4:5], v27, 0, 0, 0 ; D1040004 0001011B V_ADD_I32_e32 v26, 16, v26 ; 4A343490 BUFFER_LOAD_DWORD v34, s[0:3] + v26 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 8000221A S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v33, v8, v34 ; 10424508 S_AND_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842404 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_MUL_F32_e32 v26, v9, v34 ; 10344509 V_MUL_F32_e32 v26, 3.300000e-01, v26 ; 103434FF 3EA8F5C3 V_MUL_F32_e32 v27, 3.300000e-01, v33 ; 103642FF 3EA8F5C3 S_OR_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842504 S_XOR_B64 exec, exec, s[4:5] ; 89FE047E V_MUL_F32_e32 v26, v10, v34 ; 1034450A V_MUL_F32_e32 v26, 6.600000e-01, v26 ; 103434FF 3F28F5C3 V_MUL_F32_e32 v27, 6.600000e-01, v33 ; 103642FF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_ADD_F32_e32 v32, -2.000000e+00, v32 ; 064040F5 V_MOV_B32_e32 v33, 0x7fffffff ; 7E4202FF 7FFFFFFF V_AND_B32_e32 v33, v32, v33 ; 36424320 V_OR_B32_e32 v32, 0x80000000, v32 ; 384040FF 80000000 V_CMP_GE_F32_e32 vcc, v32, v33 ; 7C0C4320 S_AND_SAVEEXEC_B64 s[4:5], vcc ; BE84246A S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_CVT_I32_F32_e32 v26, v31 ; 7E34111F V_LSHLREV_B32_e32 v26, 4, v26 ; 34343484 V_ADD_I32_e32 v26, 16, v26 ; 4A343490 BUFFER_LOAD_DWORD v27, s[0:3] + v26 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001B1A S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v26, v10, v27 ; 1034370A V_MUL_F32_e32 v26, 6.600000e-01, v26 ; 103434FF 3F28F5C3 V_MUL_F32_e32 v27, v9, v27 ; 10363709 V_MUL_F32_e32 v27, 6.600000e-01, v27 ; 103636FF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_CVT_I32_F32_e32 v28, v16 ; 7E381110 V_LSHLREV_B32_e32 v28, 4, v28 ; 34383884 V_ADD_I32_e32 v29, 20, v28 ; 4A3A3894 BUFFER_LOAD_DWORD v30, s[0:3] + v29 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001E1D S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v29, -1.000000e+00, v30 ; 063A3CF3 V_MOV_B32_e32 v31, 0x7fffffff ; 7E3E02FF 7FFFFFFF V_AND_B32_e32 v31, v29, v31 ; 363E3F1D V_OR_B32_e32 v29, 0x80000000, v29 ; 383A3AFF 80000000 V_CMP_GE_F32_e32 vcc, v29, v31 ; 7C0C3F1D V_CNDMASK_B32_e64 v29, 0, -1, vcc, 0, 0, 0, 0 ; D200001D 01A98280 V_CMP_EQ_I32_e64 s[4:5], v29, 0, 0, 0 ; D1040004 0001011D V_ADD_I32_e32 v28, 16, v28 ; 4A383890 BUFFER_LOAD_DWORD v32, s[0:3] + v28 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 8000201C S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v31, v8, v32 ; 103E4108 S_AND_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842404 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_MUL_F32_e32 v28, v9, v32 ; 10384109 V_MUL_F32_e32 v28, 3.300000e-01, v28 ; 103838FF 3EA8F5C3 V_MUL_F32_e32 v29, 3.300000e-01, v31 ; 103A3EFF 3EA8F5C3 S_OR_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842504 S_XOR_B64 exec, exec, s[4:5] ; 89FE047E V_MUL_F32_e32 v28, v10, v32 ; 1038410A V_MUL_F32_e32 v28, 6.600000e-01, v28 ; 103838FF 3F28F5C3 V_MUL_F32_e32 v29, 6.600000e-01, v31 ; 103A3EFF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_ADD_F32_e32 v30, -2.000000e+00, v30 ; 063C3CF5 V_MOV_B32_e32 v31, 0x7fffffff ; 7E3E02FF 7FFFFFFF V_AND_B32_e32 v31, v30, v31 ; 363E3F1E V_OR_B32_e32 v30, 0x80000000, v30 ; 383C3CFF 80000000 V_CMP_GE_F32_e32 vcc, v30, v31 ; 7C0C3F1E S_AND_SAVEEXEC_B64 s[4:5], vcc ; BE84246A S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_CVT_I32_F32_e32 v28, v16 ; 7E381110 V_LSHLREV_B32_e32 v28, 4, v28 ; 34383884 V_ADD_I32_e32 v28, 16, v28 ; 4A383890 BUFFER_LOAD_DWORD v29, s[0:3] + v28 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001D1C S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v28, v10, v29 ; 10383B0A V_MUL_F32_e32 v28, 6.600000e-01, v28 ; 103838FF 3F28F5C3 V_MUL_F32_e32 v29, v9, v29 ; 103A3B09 V_MUL_F32_e32 v29, 6.600000e-01, v29 ; 103A3AFF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_CVT_I32_F32_e32 v30, v17 ; 7E3C1111 V_LSHLREV_B32_e32 v30, 4, v30 ; 343C3C84 V_ADD_I32_e32 v31, 20, v30 ; 4A3E3C94 BUFFER_LOAD_DWORD v32, s[0:3] + v31 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 8000201F S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v31, -1.000000e+00, v32 ; 063E40F3 V_MOV_B32_e32 v33, 0x7fffffff ; 7E4202FF 7FFFFFFF V_AND_B32_e32 v33, v31, v33 ; 3642431F V_OR_B32_e32 v31, 0x80000000, v31 ; 383E3EFF 80000000 V_CMP_GE_F32_e32 vcc, v31, v33 ; 7C0C431F V_CNDMASK_B32_e64 v31, 0, -1, vcc, 0, 0, 0, 0 ; D200001F 01A98280 V_CMP_EQ_I32_e64 s[4:5], v31, 0, 0, 0 ; D1040004 0001011F V_ADD_I32_e32 v30, 16, v30 ; 4A3C3C90 BUFFER_LOAD_DWORD v34, s[0:3] + v30 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 8000221E S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v33, v8, v34 ; 10424508 S_AND_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842404 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_MUL_F32_e32 v30, v9, v34 ; 103C4509 V_MUL_F32_e32 v30, 3.300000e-01, v30 ; 103C3CFF 3EA8F5C3 V_MUL_F32_e32 v31, 3.300000e-01, v33 ; 103E42FF 3EA8F5C3 S_OR_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842504 S_XOR_B64 exec, exec, s[4:5] ; 89FE047E V_MUL_F32_e32 v30, v10, v34 ; 103C450A V_MUL_F32_e32 v30, 6.600000e-01, v30 ; 103C3CFF 3F28F5C3 V_MUL_F32_e32 v31, 6.600000e-01, v33 ; 103E42FF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_ADD_F32_e32 v32, -2.000000e+00, v32 ; 064040F5 V_MOV_B32_e32 v33, 0x7fffffff ; 7E4202FF 7FFFFFFF V_AND_B32_e32 v33, v32, v33 ; 36424320 V_OR_B32_e32 v32, 0x80000000, v32 ; 384040FF 80000000 V_CMP_GE_F32_e32 vcc, v32, v33 ; 7C0C4320 S_AND_SAVEEXEC_B64 s[4:5], vcc ; BE84246A S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_CVT_I32_F32_e32 v30, v17 ; 7E3C1111 V_LSHLREV_B32_e32 v30, 4, v30 ; 343C3C84 V_ADD_I32_e32 v30, 16, v30 ; 4A3C3C90 BUFFER_LOAD_DWORD v31, s[0:3] + v30 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001F1E S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v30, v10, v31 ; 103C3F0A V_MUL_F32_e32 v30, 6.600000e-01, v30 ; 103C3CFF 3F28F5C3 V_MUL_F32_e32 v31, v9, v31 ; 103E3F09 V_MUL_F32_e32 v31, 6.600000e-01, v31 ; 103E3EFF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_CVT_I32_F32_e32 v32, v18 ; 7E401112 V_LSHLREV_B32_e32 v32, 4, v32 ; 34404084 V_ADD_I32_e32 v33, 20, v32 ; 4A424094 BUFFER_LOAD_DWORD v34, s[0:3] + v33 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002221 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v33, -1.000000e+00, v34 ; 064244F3 V_MOV_B32_e32 v35, 0x7fffffff ; 7E4602FF 7FFFFFFF V_AND_B32_e32 v35, v33, v35 ; 36464721 V_OR_B32_e32 v33, 0x80000000, v33 ; 384242FF 80000000 V_CMP_GE_F32_e32 vcc, v33, v35 ; 7C0C4721 V_CNDMASK_B32_e64 v33, 0, -1, vcc, 0, 0, 0, 0 ; D2000021 01A98280 V_CMP_EQ_I32_e64 s[4:5], v33, 0, 0, 0 ; D1040004 00010121 V_ADD_I32_e32 v32, 16, v32 ; 4A404090 BUFFER_LOAD_DWORD v36, s[0:3] + v32 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002420 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v35, v8, v36 ; 10464908 S_AND_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842404 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_MUL_F32_e32 v32, v9, v36 ; 10404909 V_MUL_F32_e32 v32, 3.300000e-01, v32 ; 104040FF 3EA8F5C3 V_MUL_F32_e32 v33, 3.300000e-01, v35 ; 104246FF 3EA8F5C3 S_OR_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842504 S_XOR_B64 exec, exec, s[4:5] ; 89FE047E V_MUL_F32_e32 v32, v10, v36 ; 1040490A V_MUL_F32_e32 v32, 6.600000e-01, v32 ; 104040FF 3F28F5C3 V_MUL_F32_e32 v33, 6.600000e-01, v35 ; 104246FF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_ADD_F32_e32 v34, -2.000000e+00, v34 ; 064444F5 V_MOV_B32_e32 v35, 0x7fffffff ; 7E4602FF 7FFFFFFF V_AND_B32_e32 v35, v34, v35 ; 36464722 V_OR_B32_e32 v34, 0x80000000, v34 ; 384444FF 80000000 V_CMP_GE_F32_e32 vcc, v34, v35 ; 7C0C4722 S_AND_SAVEEXEC_B64 s[4:5], vcc ; BE84246A S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_CVT_I32_F32_e32 v32, v18 ; 7E401112 V_LSHLREV_B32_e32 v32, 4, v32 ; 34404084 V_ADD_I32_e32 v32, 16, v32 ; 4A404090 BUFFER_LOAD_DWORD v33, s[0:3] + v32 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002120 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v32, v10, v33 ; 1040430A V_MUL_F32_e32 v32, 6.600000e-01, v32 ; 104040FF 3F28F5C3 V_MUL_F32_e32 v33, v9, v33 ; 10424309 V_MUL_F32_e32 v33, 6.600000e-01, v33 ; 104242FF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_CVT_I32_F32_e32 v34, v19 ; 7E441113 V_LSHLREV_B32_e32 v34, 4, v34 ; 34444484 V_ADD_I32_e32 v35, 20, v34 ; 4A464494 BUFFER_LOAD_DWORD v38, s[0:3] + v35 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002623 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v35, -1.000000e+00, v38 ; 06464CF3 V_MOV_B32_e32 v36, 0x7fffffff ; 7E4802FF 7FFFFFFF V_AND_B32_e32 v36, v35, v36 ; 36484923 V_OR_B32_e32 v35, 0x80000000, v35 ; 384646FF 80000000 V_CMP_GE_F32_e32 vcc, v35, v36 ; 7C0C4923 V_CNDMASK_B32_e64 v35, 0, -1, vcc, 0, 0, 0, 0 ; D2000023 01A98280 V_CMP_EQ_I32_e64 s[4:5], v35, 0, 0, 0 ; D1040004 00010123 V_ADD_I32_e32 v34, 16, v34 ; 4A444490 BUFFER_LOAD_DWORD v37, s[0:3] + v34 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002522 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v36, v8, v37 ; 10484B08 S_AND_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842404 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_MUL_F32_e32 v34, v9, v37 ; 10444B09 V_MUL_F32_e32 v34, 3.300000e-01, v34 ; 104444FF 3EA8F5C3 V_MUL_F32_e32 v35, 3.300000e-01, v36 ; 104648FF 3EA8F5C3 S_OR_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842504 S_BUFFER_LOAD_DWORD s6, s[0:3], 0x1 ; C2030101 S_BUFFER_LOAD_DWORD s7, s[0:3], 0x0 ; C2038100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_XOR_B64 exec, exec, s[4:5] ; 89FE047E V_MUL_F32_e32 v34, v10, v37 ; 10444B0A V_MUL_F32_e32 v34, 6.600000e-01, v34 ; 104444FF 3F28F5C3 V_MUL_F32_e32 v35, 6.600000e-01, v36 ; 104648FF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E V_MOV_B32_e32 v37, s6 ; 7E4A0206 V_MOV_B32_e32 v36, s7 ; 7E480207 V_ADD_F32_e32 v38, -2.000000e+00, v38 ; 064C4CF5 V_MOV_B32_e32 v39, 0x7fffffff ; 7E4E02FF 7FFFFFFF V_AND_B32_e32 v39, v38, v39 ; 364E4F26 V_OR_B32_e32 v38, 0x80000000, v38 ; 384C4CFF 80000000 V_CMP_GE_F32_e32 vcc, v38, v39 ; 7C0C4F26 S_AND_SAVEEXEC_B64 s[4:5], vcc ; BE84246A S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E V_CVT_I32_F32_e32 v16, v19 ; 7E201113 V_LSHLREV_B32_e32 v16, 4, v16 ; 34202084 V_ADD_I32_e32 v16, 16, v16 ; 4A202090 BUFFER_LOAD_DWORD v16, s[0:3] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001010 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v17, v10, v16 ; 1022210A V_MUL_F32_e32 v34, 6.600000e-01, v17 ; 104422FF 3F28F5C3 V_MUL_F32_e32 v8, v9, v16 ; 10102109 V_MUL_F32_e32 v35, 6.600000e-01, v8 ; 104610FF 3F28F5C3 S_OR_B64 exec, exec, s[4:5] ; 88FE047E EXP 15, 32, 0, 0, 0, v21, v20, v12, v13 ; F800020F 0D0C1415 EXP 15, 33, 0, 0, 0, v23, v22, v14, v15 ; F800021F 0F0E1617 EXP 15, 34, 0, 0, 0, v25, v24, v4, v5 ; F800022F 05041819 EXP 15, 35, 0, 0, 0, v27, v26, v6, v7 ; F800023F 07061A1B S_WAITCNT expcnt(0) ; BF8C070F V_MOV_B32_e32 v4, 0.000000e+00 ; 7E080280 EXP 15, 36, 0, 0, 0, v29, v28, v4, v4 ; F800024F 04041C1D EXP 15, 37, 0, 0, 0, v31, v30, v4, v4 ; F800025F 04041E1F EXP 15, 38, 0, 0, 0, v33, v32, v4, v4 ; F800026F 04042021 EXP 15, 39, 0, 0, 0, v35, v34, v4, v4 ; F800027F 04042223 S_WAITCNT expcnt(0) ; BF8C070F V_ADD_F32_e32 v4, v37, v1 ; 06080325 V_ADD_F32_e32 v5, v36, v0 ; 060A0124 V_MOV_B32_e32 v6, 1.000000e+00 ; 7E0C02F2 EXP 15, 12, 0, 1, 0, v5, v4, v2, v6 ; F80008CF 06020405 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL IN[6], GENERIC[15], PERSPECTIVE DCL IN[7], GENERIC[16], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SAMP[11] DCL SAMP[12] DCL SAMP[13] DCL SAMP[14] DCL SAMP[15] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[0], TEMP[0], TEMP[0] 3: MOV TEMP[1].y, TEMP[0].wwww 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2], TEMP[2], SAMP[0], 2D 6: MUL TEMP[2], TEMP[2], TEMP[2] 7: MOV TEMP[1].x, TEMP[2].wwww 8: MOV TEMP[3].xy, IN[2].xyyy 9: TEX TEMP[3], TEMP[3], SAMP[2], 2D 10: MUL TEMP[3], TEMP[3], TEMP[3] 11: MOV TEMP[1].z, TEMP[3].wwww 12: MOV TEMP[4].xy, IN[3].xyyy 13: TEX TEMP[4], TEMP[4], SAMP[3], 2D 14: MUL TEMP[4], TEMP[4], TEMP[4] 15: MOV TEMP[1].w, TEMP[4].wwww 16: MUL TEMP[1], TEMP[1], TEMP[1] 17: MUL TEMP[1], TEMP[1], TEMP[1] 18: MOV TEMP[5].xy, IN[0].zwzz 19: MOV TEMP[5].zw, IN[1].wwzw 20: MUL TEMP[1], TEMP[1], TEMP[5] 21: MUL TEMP[0], TEMP[0], TEMP[1].yyyy 22: MAD TEMP[0], TEMP[1].xxxx, TEMP[2], TEMP[0] 23: MAD TEMP[0], TEMP[1].zzzz, TEMP[3], TEMP[0] 24: MAD TEMP[0], TEMP[1].wwww, TEMP[4], TEMP[0] 25: MOV TEMP[6].xy, IN[4].xyyy 26: TEX TEMP[6], TEMP[6], SAMP[4], 2D 27: MUL TEMP[2], TEMP[6], TEMP[6] 28: MOV TEMP[3].x, TEMP[2].wwww 29: MOV TEMP[6].xy, IN[5].xyyy 30: TEX TEMP[6], TEMP[6], SAMP[5], 2D 31: MUL TEMP[4], TEMP[6], TEMP[6] 32: MOV TEMP[3].y, TEMP[4].wwww 33: MOV TEMP[6].xy, IN[6].xyyy 34: TEX TEMP[6], TEMP[6], SAMP[6], 2D 35: MUL TEMP[5], TEMP[6], TEMP[6] 36: MOV TEMP[3].z, TEMP[5].wwww 37: MOV TEMP[6].xy, IN[7].xyyy 38: TEX TEMP[6], TEMP[6], SAMP[7], 2D 39: MUL TEMP[6], TEMP[6], TEMP[6] 40: MOV TEMP[3].w, TEMP[6].wwww 41: MUL TEMP[3], TEMP[3], TEMP[3] 42: MUL TEMP[3], TEMP[3], TEMP[3] 43: MOV TEMP[7].xy, IN[2].zwzz 44: MOV TEMP[7].zw, IN[3].wwzw 45: MUL TEMP[3], TEMP[3], TEMP[7] 46: MAD TEMP[0], TEMP[3].xxxx, TEMP[2], TEMP[0] 47: MAD TEMP[0], TEMP[3].yyyy, TEMP[4], TEMP[0] 48: MAD TEMP[0], TEMP[3].zzzz, TEMP[5], TEMP[0] 49: MAD TEMP[0], TEMP[3].wwww, TEMP[6], TEMP[0] 50: DP4 TEMP[4].x, TEMP[3], IMM[0].xxxx 51: DP4 TEMP[5].x, TEMP[1], IMM[0].xxxx 52: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 53: RCP TEMP[2].x, TEMP[4].xxxx 54: MUL TEMP[0], TEMP[0], TEMP[2].xxxx 55: MOV TEMP[4].xy, IN[1].xyyy 56: TEX TEMP[4].x, TEMP[4], SAMP[9], 2D 57: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].yyyy 58: MOV TEMP[5].xy, IN[0].xyyy 59: TEX TEMP[5].x, TEMP[5], SAMP[8], 2D 60: MAD TEMP[4].x, TEMP[1].xxxx, TEMP[5].xxxx, TEMP[4].xxxx 61: MOV TEMP[5].xy, IN[2].xyyy 62: TEX TEMP[5].x, TEMP[5], SAMP[10], 2D 63: MAD TEMP[4].x, TEMP[1].zzzz, TEMP[5].xxxx, TEMP[4].xxxx 64: MOV TEMP[5].xy, IN[3].xyyy 65: TEX TEMP[5].x, TEMP[5], SAMP[11], 2D 66: MAD TEMP[1].x, TEMP[1].wwww, TEMP[5].xxxx, TEMP[4].xxxx 67: MOV TEMP[4].xy, IN[4].xyyy 68: TEX TEMP[4].x, TEMP[4], SAMP[12], 2D 69: MAD TEMP[1].x, TEMP[3].xxxx, TEMP[4].xxxx, TEMP[1].xxxx 70: MOV TEMP[4].xy, IN[5].xyyy 71: TEX TEMP[4].x, TEMP[4], SAMP[13], 2D 72: MAD TEMP[1].x, TEMP[3].yyyy, TEMP[4].xxxx, TEMP[1].xxxx 73: MOV TEMP[4].xy, IN[6].xyyy 74: TEX TEMP[4].x, TEMP[4], SAMP[14], 2D 75: MAD TEMP[1].x, TEMP[3].zzzz, TEMP[4].xxxx, TEMP[1].xxxx 76: MOV TEMP[4].xy, IN[7].xyyy 77: TEX TEMP[4].x, TEMP[4], SAMP[15], 2D 78: MAD TEMP[1].x, TEMP[3].wwww, TEMP[4].xxxx, TEMP[1].xxxx 79: MUL TEMP[1], TEMP[2].xxxx, TEMP[1].xxxx 80: MOV OUT[1], TEMP[1] 81: MOV OUT[0], TEMP[0] 82: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 10 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 10 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 11 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 11 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 12 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 12 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 13 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 13 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 14 %79 = load <8 x i32> addrspace(2)* %78, !tbaa !0 %80 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 14 %81 = load <4 x i32> addrspace(2)* %80, !tbaa !0 %82 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 15 %83 = load <8 x i32> addrspace(2)* %82, !tbaa !0 %84 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 15 %85 = load <4 x i32> addrspace(2)* %84, !tbaa !0 %86 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %110 = bitcast float %90 to i32 %111 = bitcast float %91 to i32 %112 = insertelement <2 x i32> undef, i32 %110, i32 0 %113 = insertelement <2 x i32> %112, i32 %111, i32 1 %114 = bitcast <8 x i32> %27 to <32 x i8> %115 = bitcast <4 x i32> %29 to <16 x i8> %116 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %114, <16 x i8> %115, i32 2) %117 = extractelement <4 x float> %116, i32 0 %118 = extractelement <4 x float> %116, i32 1 %119 = extractelement <4 x float> %116, i32 2 %120 = extractelement <4 x float> %116, i32 3 %121 = fmul float %117, %117 %122 = fmul float %118, %118 %123 = fmul float %119, %119 %124 = fmul float %120, %120 %125 = bitcast float %86 to i32 %126 = bitcast float %87 to i32 %127 = insertelement <2 x i32> undef, i32 %125, i32 0 %128 = insertelement <2 x i32> %127, i32 %126, i32 1 %129 = bitcast <8 x i32> %23 to <32 x i8> %130 = bitcast <4 x i32> %25 to <16 x i8> %131 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %128, <32 x i8> %129, <16 x i8> %130, i32 2) %132 = extractelement <4 x float> %131, i32 0 %133 = extractelement <4 x float> %131, i32 1 %134 = extractelement <4 x float> %131, i32 2 %135 = extractelement <4 x float> %131, i32 3 %136 = fmul float %132, %132 %137 = fmul float %133, %133 %138 = fmul float %134, %134 %139 = fmul float %135, %135 %140 = bitcast float %94 to i32 %141 = bitcast float %95 to i32 %142 = insertelement <2 x i32> undef, i32 %140, i32 0 %143 = insertelement <2 x i32> %142, i32 %141, i32 1 %144 = bitcast <8 x i32> %31 to <32 x i8> %145 = bitcast <4 x i32> %33 to <16 x i8> %146 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %143, <32 x i8> %144, <16 x i8> %145, i32 2) %147 = extractelement <4 x float> %146, i32 0 %148 = extractelement <4 x float> %146, i32 1 %149 = extractelement <4 x float> %146, i32 2 %150 = extractelement <4 x float> %146, i32 3 %151 = fmul float %147, %147 %152 = fmul float %148, %148 %153 = fmul float %149, %149 %154 = fmul float %150, %150 %155 = bitcast float %98 to i32 %156 = bitcast float %99 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %35 to <32 x i8> %160 = bitcast <4 x i32> %37 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = extractelement <4 x float> %161, i32 1 %164 = extractelement <4 x float> %161, i32 2 %165 = extractelement <4 x float> %161, i32 3 %166 = fmul float %162, %162 %167 = fmul float %163, %163 %168 = fmul float %164, %164 %169 = fmul float %165, %165 %170 = fmul float %139, %139 %171 = fmul float %124, %124 %172 = fmul float %154, %154 %173 = fmul float %169, %169 %174 = fmul float %170, %170 %175 = fmul float %171, %171 %176 = fmul float %172, %172 %177 = fmul float %173, %173 %178 = fmul float %174, %88 %179 = fmul float %175, %89 %180 = fmul float %176, %92 %181 = fmul float %177, %93 %182 = fmul float %121, %179 %183 = fmul float %122, %179 %184 = fmul float %123, %179 %185 = fmul float %124, %179 %186 = fmul float %178, %136 %187 = fadd float %186, %182 %188 = fmul float %178, %137 %189 = fadd float %188, %183 %190 = fmul float %178, %138 %191 = fadd float %190, %184 %192 = fmul float %178, %139 %193 = fadd float %192, %185 %194 = fmul float %180, %151 %195 = fadd float %194, %187 %196 = fmul float %180, %152 %197 = fadd float %196, %189 %198 = fmul float %180, %153 %199 = fadd float %198, %191 %200 = fmul float %180, %154 %201 = fadd float %200, %193 %202 = fmul float %181, %166 %203 = fadd float %202, %195 %204 = fmul float %181, %167 %205 = fadd float %204, %197 %206 = fmul float %181, %168 %207 = fadd float %206, %199 %208 = fmul float %181, %169 %209 = fadd float %208, %201 %210 = bitcast float %102 to i32 %211 = bitcast float %103 to i32 %212 = insertelement <2 x i32> undef, i32 %210, i32 0 %213 = insertelement <2 x i32> %212, i32 %211, i32 1 %214 = bitcast <8 x i32> %39 to <32 x i8> %215 = bitcast <4 x i32> %41 to <16 x i8> %216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %213, <32 x i8> %214, <16 x i8> %215, i32 2) %217 = extractelement <4 x float> %216, i32 0 %218 = extractelement <4 x float> %216, i32 1 %219 = extractelement <4 x float> %216, i32 2 %220 = extractelement <4 x float> %216, i32 3 %221 = fmul float %217, %217 %222 = fmul float %218, %218 %223 = fmul float %219, %219 %224 = fmul float %220, %220 %225 = bitcast float %104 to i32 %226 = bitcast float %105 to i32 %227 = insertelement <2 x i32> undef, i32 %225, i32 0 %228 = insertelement <2 x i32> %227, i32 %226, i32 1 %229 = bitcast <8 x i32> %43 to <32 x i8> %230 = bitcast <4 x i32> %45 to <16 x i8> %231 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %229, <16 x i8> %230, i32 2) %232 = extractelement <4 x float> %231, i32 0 %233 = extractelement <4 x float> %231, i32 1 %234 = extractelement <4 x float> %231, i32 2 %235 = extractelement <4 x float> %231, i32 3 %236 = fmul float %232, %232 %237 = fmul float %233, %233 %238 = fmul float %234, %234 %239 = fmul float %235, %235 %240 = bitcast float %106 to i32 %241 = bitcast float %107 to i32 %242 = insertelement <2 x i32> undef, i32 %240, i32 0 %243 = insertelement <2 x i32> %242, i32 %241, i32 1 %244 = bitcast <8 x i32> %47 to <32 x i8> %245 = bitcast <4 x i32> %49 to <16 x i8> %246 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %243, <32 x i8> %244, <16 x i8> %245, i32 2) %247 = extractelement <4 x float> %246, i32 0 %248 = extractelement <4 x float> %246, i32 1 %249 = extractelement <4 x float> %246, i32 2 %250 = extractelement <4 x float> %246, i32 3 %251 = fmul float %247, %247 %252 = fmul float %248, %248 %253 = fmul float %249, %249 %254 = fmul float %250, %250 %255 = bitcast float %108 to i32 %256 = bitcast float %109 to i32 %257 = insertelement <2 x i32> undef, i32 %255, i32 0 %258 = insertelement <2 x i32> %257, i32 %256, i32 1 %259 = bitcast <8 x i32> %51 to <32 x i8> %260 = bitcast <4 x i32> %53 to <16 x i8> %261 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %258, <32 x i8> %259, <16 x i8> %260, i32 2) %262 = extractelement <4 x float> %261, i32 0 %263 = extractelement <4 x float> %261, i32 1 %264 = extractelement <4 x float> %261, i32 2 %265 = extractelement <4 x float> %261, i32 3 %266 = fmul float %262, %262 %267 = fmul float %263, %263 %268 = fmul float %264, %264 %269 = fmul float %265, %265 %270 = fmul float %224, %224 %271 = fmul float %239, %239 %272 = fmul float %254, %254 %273 = fmul float %269, %269 %274 = fmul float %270, %270 %275 = fmul float %271, %271 %276 = fmul float %272, %272 %277 = fmul float %273, %273 %278 = fmul float %274, %96 %279 = fmul float %275, %97 %280 = fmul float %276, %100 %281 = fmul float %277, %101 %282 = fmul float %278, %221 %283 = fadd float %282, %203 %284 = fmul float %278, %222 %285 = fadd float %284, %205 %286 = fmul float %278, %223 %287 = fadd float %286, %207 %288 = fmul float %278, %224 %289 = fadd float %288, %209 %290 = fmul float %279, %236 %291 = fadd float %290, %283 %292 = fmul float %279, %237 %293 = fadd float %292, %285 %294 = fmul float %279, %238 %295 = fadd float %294, %287 %296 = fmul float %279, %239 %297 = fadd float %296, %289 %298 = fmul float %280, %251 %299 = fadd float %298, %291 %300 = fmul float %280, %252 %301 = fadd float %300, %293 %302 = fmul float %280, %253 %303 = fadd float %302, %295 %304 = fmul float %280, %254 %305 = fadd float %304, %297 %306 = fmul float %281, %266 %307 = fadd float %306, %299 %308 = fmul float %281, %267 %309 = fadd float %308, %301 %310 = fmul float %281, %268 %311 = fadd float %310, %303 %312 = fmul float %281, %269 %313 = fadd float %312, %305 %314 = fmul float %278, 1.000000e+00 %315 = fmul float %279, 1.000000e+00 %316 = fadd float %314, %315 %317 = fmul float %280, 1.000000e+00 %318 = fadd float %316, %317 %319 = fmul float %281, 1.000000e+00 %320 = fadd float %318, %319 %321 = fmul float %178, 1.000000e+00 %322 = fmul float %179, 1.000000e+00 %323 = fadd float %321, %322 %324 = fmul float %180, 1.000000e+00 %325 = fadd float %323, %324 %326 = fmul float %181, 1.000000e+00 %327 = fadd float %325, %326 %328 = fadd float %320, %327 %329 = fdiv float 1.000000e+00, %328 %330 = fmul float %307, %329 %331 = fmul float %309, %329 %332 = fmul float %311, %329 %333 = fmul float %313, %329 %334 = bitcast float %90 to i32 %335 = bitcast float %91 to i32 %336 = insertelement <2 x i32> undef, i32 %334, i32 0 %337 = insertelement <2 x i32> %336, i32 %335, i32 1 %338 = bitcast <8 x i32> %59 to <32 x i8> %339 = bitcast <4 x i32> %61 to <16 x i8> %340 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %337, <32 x i8> %338, <16 x i8> %339, i32 2) %341 = extractelement <4 x float> %340, i32 0 %342 = fmul float %341, %179 %343 = bitcast float %86 to i32 %344 = bitcast float %87 to i32 %345 = insertelement <2 x i32> undef, i32 %343, i32 0 %346 = insertelement <2 x i32> %345, i32 %344, i32 1 %347 = bitcast <8 x i32> %55 to <32 x i8> %348 = bitcast <4 x i32> %57 to <16 x i8> %349 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %346, <32 x i8> %347, <16 x i8> %348, i32 2) %350 = extractelement <4 x float> %349, i32 0 %351 = fmul float %178, %350 %352 = fadd float %351, %342 %353 = bitcast float %94 to i32 %354 = bitcast float %95 to i32 %355 = insertelement <2 x i32> undef, i32 %353, i32 0 %356 = insertelement <2 x i32> %355, i32 %354, i32 1 %357 = bitcast <8 x i32> %63 to <32 x i8> %358 = bitcast <4 x i32> %65 to <16 x i8> %359 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %356, <32 x i8> %357, <16 x i8> %358, i32 2) %360 = extractelement <4 x float> %359, i32 0 %361 = fmul float %180, %360 %362 = fadd float %361, %352 %363 = bitcast float %98 to i32 %364 = bitcast float %99 to i32 %365 = insertelement <2 x i32> undef, i32 %363, i32 0 %366 = insertelement <2 x i32> %365, i32 %364, i32 1 %367 = bitcast <8 x i32> %67 to <32 x i8> %368 = bitcast <4 x i32> %69 to <16 x i8> %369 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %366, <32 x i8> %367, <16 x i8> %368, i32 2) %370 = extractelement <4 x float> %369, i32 0 %371 = fmul float %181, %370 %372 = fadd float %371, %362 %373 = bitcast float %102 to i32 %374 = bitcast float %103 to i32 %375 = insertelement <2 x i32> undef, i32 %373, i32 0 %376 = insertelement <2 x i32> %375, i32 %374, i32 1 %377 = bitcast <8 x i32> %71 to <32 x i8> %378 = bitcast <4 x i32> %73 to <16 x i8> %379 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %376, <32 x i8> %377, <16 x i8> %378, i32 2) %380 = extractelement <4 x float> %379, i32 0 %381 = fmul float %278, %380 %382 = fadd float %381, %372 %383 = bitcast float %104 to i32 %384 = bitcast float %105 to i32 %385 = insertelement <2 x i32> undef, i32 %383, i32 0 %386 = insertelement <2 x i32> %385, i32 %384, i32 1 %387 = bitcast <8 x i32> %75 to <32 x i8> %388 = bitcast <4 x i32> %77 to <16 x i8> %389 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %386, <32 x i8> %387, <16 x i8> %388, i32 2) %390 = extractelement <4 x float> %389, i32 0 %391 = fmul float %279, %390 %392 = fadd float %391, %382 %393 = bitcast float %106 to i32 %394 = bitcast float %107 to i32 %395 = insertelement <2 x i32> undef, i32 %393, i32 0 %396 = insertelement <2 x i32> %395, i32 %394, i32 1 %397 = bitcast <8 x i32> %79 to <32 x i8> %398 = bitcast <4 x i32> %81 to <16 x i8> %399 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %396, <32 x i8> %397, <16 x i8> %398, i32 2) %400 = extractelement <4 x float> %399, i32 0 %401 = fmul float %280, %400 %402 = fadd float %401, %392 %403 = bitcast float %108 to i32 %404 = bitcast float %109 to i32 %405 = insertelement <2 x i32> undef, i32 %403, i32 0 %406 = insertelement <2 x i32> %405, i32 %404, i32 1 %407 = bitcast <8 x i32> %83 to <32 x i8> %408 = bitcast <4 x i32> %85 to <16 x i8> %409 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %406, <32 x i8> %407, <16 x i8> %408, i32 2) %410 = extractelement <4 x float> %409, i32 0 %411 = fmul float %281, %410 %412 = fadd float %411, %402 %413 = fmul float %329, %412 %414 = fmul float %329, %412 %415 = fmul float %329, %412 %416 = fmul float %329, %412 %417 = call i32 @llvm.SI.packf16(float %330, float %331) %418 = bitcast i32 %417 to float %419 = call i32 @llvm.SI.packf16(float %332, float %333) %420 = bitcast i32 %419 to float %421 = call i32 @llvm.SI.packf16(float %413, float %414) %422 = bitcast i32 %421 to float %423 = call i32 @llvm.SI.packf16(float %415, float %416) %424 = bitcast i32 %423 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %418, float %420, float %418, float %420) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %422, float %424, float %422, float %424) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 1, [m0] ; C80C0500 V_INTERP_P2_F32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 V_INTERP_P1_F32 v2, v0, 0, 1, [m0] ; C8080400 V_INTERP_P2_F32 v2, [v2], v1, 0, 1, [m0] ; C8090401 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x8 ; C0C60708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030402 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v8, v7, v7 ; 10100F07 V_MUL_F32_e32 v9, v8, v8 ; 10121108 V_MUL_F32_e32 v9, v9, v9 ; 10121309 V_INTERP_P1_F32 v10, v0, 3, 0, [m0] ; C8280300 V_INTERP_P2_F32 v10, [v10], v1, 3, 0, [m0] ; C8290301 V_MUL_F32_e32 v9, v9, v10 ; 10121509 V_INTERP_P1_F32 v11, v0, 1, 0, [m0] ; C82C0100 V_INTERP_P2_F32 v11, [v11], v1, 1, 0, [m0] ; C82D0101 V_INTERP_P1_F32 v10, v0, 0, 0, [m0] ; C8280000 V_INTERP_P2_F32 v10, [v10], v1, 0, 0, [m0] ; C8290001 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x0 ; C0C60700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[12:19], s[0:3] ; F0800F00 00030C0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v16, v15, v15 ; 10201F0F V_MUL_F32_e32 v17, v16, v16 ; 10222110 V_MUL_F32_e32 v17, v17, v17 ; 10222311 V_INTERP_P1_F32 v18, v0, 2, 0, [m0] ; C8480200 V_INTERP_P2_F32 v18, [v18], v1, 2, 0, [m0] ; C8490201 V_MAD_F32 v19, v17, v18, v9, 0, 0 ; D2820013 04262511 V_INTERP_P1_F32 v21, v0, 1, 2, [m0] ; C8540900 V_INTERP_P2_F32 v21, [v21], v1, 1, 2, [m0] ; C8550901 V_INTERP_P1_F32 v20, v0, 0, 2, [m0] ; C8500800 V_INTERP_P2_F32 v20, [v20], v1, 0, 2, [m0] ; C8510801 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x8 ; C0800508 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x10 ; C0C60710 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[12:19], s[0:3] ; F0800F00 00031614 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v26, v25, v25 ; 10343319 V_MUL_F32_e32 v27, v26, v26 ; 1036351A V_MUL_F32_e32 v27, v27, v27 ; 1036371B V_INTERP_P1_F32 v28, v0, 2, 1, [m0] ; C8700600 V_INTERP_P2_F32 v28, [v28], v1, 2, 1, [m0] ; C8710601 V_MAD_F32 v19, v27, v28, v19, 0, 0 ; D2820013 044E391B V_INTERP_P1_F32 v30, v0, 1, 3, [m0] ; C8780D00 V_INTERP_P2_F32 v30, [v30], v1, 1, 3, [m0] ; C8790D01 V_INTERP_P1_F32 v29, v0, 0, 3, [m0] ; C8740C00 V_INTERP_P2_F32 v29, [v29], v1, 0, 3, [m0] ; C8750C01 S_LOAD_DWORDX4 s[0:3], s[4:5], 0xc ; C080050C S_LOAD_DWORDX8 s[12:19], s[6:7], 0x18 ; C0C60718 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[12:19], s[0:3] ; F0800F00 00031F1D S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v35, v34, v34 ; 10464522 V_MUL_F32_e32 v36, v35, v35 ; 10484723 V_MUL_F32_e32 v36, v36, v36 ; 10484924 V_INTERP_P1_F32 v37, v0, 3, 1, [m0] ; C8940700 V_INTERP_P2_F32 v37, [v37], v1, 3, 1, [m0] ; C8950701 V_MAD_F32 v19, v36, v37, v19, 0, 0 ; D2820013 044E4B24 V_INTERP_P1_F32 v39, v0, 1, 5, [m0] ; C89C1500 V_INTERP_P2_F32 v39, [v39], v1, 1, 5, [m0] ; C89D1501 V_INTERP_P1_F32 v38, v0, 0, 5, [m0] ; C8981400 V_INTERP_P2_F32 v38, [v38], v1, 0, 5, [m0] ; C8991401 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x14 ; C0800514 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x28 ; C0C60728 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[38:39], s[12:19], s[0:3] ; F0800F00 00032826 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v44, v43, v43 ; 1058572B V_MUL_F32_e32 v45, v44, v44 ; 105A592C V_MUL_F32_e32 v45, v45, v45 ; 105A5B2D V_INTERP_P1_F32 v46, v0, 3, 2, [m0] ; C8B80B00 V_INTERP_P2_F32 v46, [v46], v1, 3, 2, [m0] ; C8B90B01 V_MUL_F32_e32 v45, v45, v46 ; 105A5D2D V_INTERP_P1_F32 v47, v0, 1, 4, [m0] ; C8BC1100 V_INTERP_P2_F32 v47, [v47], v1, 1, 4, [m0] ; C8BD1101 V_INTERP_P1_F32 v46, v0, 0, 4, [m0] ; C8B81000 V_INTERP_P2_F32 v46, [v46], v1, 0, 4, [m0] ; C8B91001 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x10 ; C0800510 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x20 ; C0C60720 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[48:51], 15, 0, 0, 0, 0, 0, 0, 0, v[46:47], s[12:19], s[0:3] ; F0800F00 0003302E S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v52, v51, v51 ; 10686733 V_MUL_F32_e32 v53, v52, v52 ; 106A6934 V_MUL_F32_e32 v53, v53, v53 ; 106A6B35 V_INTERP_P1_F32 v54, v0, 2, 2, [m0] ; C8D80A00 V_INTERP_P2_F32 v54, [v54], v1, 2, 2, [m0] ; C8D90A01 V_MAD_F32 v55, v53, v54, v45, 0, 0 ; D2820037 04B66D35 V_INTERP_P1_F32 v57, v0, 1, 6, [m0] ; C8E41900 V_INTERP_P2_F32 v57, [v57], v1, 1, 6, [m0] ; C8E51901 V_INTERP_P1_F32 v56, v0, 0, 6, [m0] ; C8E01800 V_INTERP_P2_F32 v56, [v56], v1, 0, 6, [m0] ; C8E11801 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x18 ; C0800518 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x30 ; C0C60730 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[58:61], 15, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[12:19], s[0:3] ; F0800F00 00033A38 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v62, v61, v61 ; 107C7B3D V_MUL_F32_e32 v63, v62, v62 ; 107E7D3E V_MUL_F32_e32 v63, v63, v63 ; 107E7F3F V_INTERP_P1_F32 v64, v0, 2, 3, [m0] ; C9000E00 V_INTERP_P2_F32 v64, [v64], v1, 2, 3, [m0] ; C9010E01 V_MAD_F32 v55, v63, v64, v55, 0, 0 ; D2820037 04DE813F V_INTERP_P1_F32 v66, v0, 1, 7, [m0] ; C9081D00 V_INTERP_P2_F32 v66, [v66], v1, 1, 7, [m0] ; C9091D01 V_INTERP_P1_F32 v65, v0, 0, 7, [m0] ; C9041C00 V_INTERP_P2_F32 v65, [v65], v1, 0, 7, [m0] ; C9051C01 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x1c ; C080051C S_LOAD_DWORDX8 s[12:19], s[6:7], 0x38 ; C0C60738 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[67:70], 15, 0, 0, 0, 0, 0, 0, 0, v[65:66], s[12:19], s[0:3] ; F0800F00 00034341 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v71, v70, v70 ; 108E8D46 V_MUL_F32_e32 v72, v71, v71 ; 10908F47 V_MUL_F32_e32 v72, v72, v72 ; 10909148 V_INTERP_P1_F32 v73, v0, 3, 3, [m0] ; C9240F00 V_INTERP_P2_F32 v73, [v73], v1, 3, 3, [m0] ; C9250F01 V_MAD_F32 v0, v72, v73, v55, 0, 0 ; D2820000 04DE9348 V_ADD_F32_e32 v0, v0, v19 ; 06002700 V_RCP_F32_e32 v0, v0 ; 7E005500 V_MUL_F32_e32 v1, v8, v9 ; 10021308 V_MUL_F32_e32 v8, v17, v18 ; 10102511 V_MAD_F32 v1, v8, v16, v1, 0, 0 ; D2820001 04062108 V_MUL_F32_e32 v16, v27, v28 ; 1020391B V_MAD_F32 v1, v16, v26, v1, 0, 0 ; D2820001 04063510 V_MUL_F32_e32 v17, v36, v37 ; 10224B24 V_MAD_F32 v1, v17, v35, v1, 0, 0 ; D2820001 04064711 V_MUL_F32_e32 v18, v53, v54 ; 10246D35 V_MAD_F32 v1, v18, v52, v1, 0, 0 ; D2820001 04066912 V_MAD_F32 v1, v45, v44, v1, 0, 0 ; D2820001 0406592D V_MUL_F32_e32 v19, v63, v64 ; 1026813F V_MAD_F32 v1, v19, v62, v1, 0, 0 ; D2820001 04067D13 V_MUL_F32_e32 v26, v72, v73 ; 10349348 V_MAD_F32 v1, v26, v71, v1, 0, 0 ; D2820001 04068F1A V_MUL_F32_e32 v1, v1, v0 ; 10020101 V_MUL_F32_e32 v27, v6, v6 ; 10360D06 V_MUL_F32_e32 v27, v27, v9 ; 1036131B V_MUL_F32_e32 v28, v14, v14 ; 10381D0E V_MAD_F32 v27, v8, v28, v27, 0, 0 ; D282001B 046E3908 V_MUL_F32_e32 v28, v24, v24 ; 10383118 V_MAD_F32 v27, v16, v28, v27, 0, 0 ; D282001B 046E3910 V_MUL_F32_e32 v28, v33, v33 ; 10384321 V_MAD_F32 v27, v17, v28, v27, 0, 0 ; D282001B 046E3911 V_MUL_F32_e32 v28, v50, v50 ; 10386532 V_MAD_F32 v27, v18, v28, v27, 0, 0 ; D282001B 046E3912 V_MUL_F32_e32 v28, v42, v42 ; 1038552A V_MAD_F32 v27, v45, v28, v27, 0, 0 ; D282001B 046E392D V_MUL_F32_e32 v28, v60, v60 ; 1038793C V_MAD_F32 v27, v19, v28, v27, 0, 0 ; D282001B 046E3913 V_MUL_F32_e32 v28, v69, v69 ; 10388B45 V_MAD_F32 v27, v26, v28, v27, 0, 0 ; D282001B 046E391A V_MUL_F32_e32 v27, v27, v0 ; 1036011B V_CVT_PKRTZ_F16_F32_e32 v1, v27, v1 ; 5E02031B V_MUL_F32_e32 v27, v5, v5 ; 10360B05 V_MUL_F32_e32 v27, v27, v9 ; 1036131B V_MUL_F32_e32 v28, v13, v13 ; 10381B0D V_MAD_F32 v27, v8, v28, v27, 0, 0 ; D282001B 046E3908 V_MUL_F32_e32 v28, v23, v23 ; 10382F17 V_MAD_F32 v27, v16, v28, v27, 0, 0 ; D282001B 046E3910 V_MUL_F32_e32 v28, v32, v32 ; 10384120 V_MAD_F32 v27, v17, v28, v27, 0, 0 ; D282001B 046E3911 V_MUL_F32_e32 v28, v49, v49 ; 10386331 V_MAD_F32 v27, v18, v28, v27, 0, 0 ; D282001B 046E3912 V_MUL_F32_e32 v28, v41, v41 ; 10385329 V_MAD_F32 v27, v45, v28, v27, 0, 0 ; D282001B 046E392D V_MUL_F32_e32 v28, v59, v59 ; 1038773B V_MAD_F32 v27, v19, v28, v27, 0, 0 ; D282001B 046E3913 V_MUL_F32_e32 v28, v68, v68 ; 10388944 V_MAD_F32 v27, v26, v28, v27, 0, 0 ; D282001B 046E391A V_MUL_F32_e32 v27, v27, v0 ; 1036011B V_MUL_F32_e32 v4, v4, v4 ; 10080904 V_MUL_F32_e32 v4, v4, v9 ; 10081304 V_MUL_F32_e32 v5, v12, v12 ; 100A190C V_MAD_F32 v4, v8, v5, v4, 0, 0 ; D2820004 04120B08 V_MUL_F32_e32 v5, v22, v22 ; 100A2D16 V_MAD_F32 v4, v16, v5, v4, 0, 0 ; D2820004 04120B10 V_MUL_F32_e32 v5, v31, v31 ; 100A3F1F V_MAD_F32 v4, v17, v5, v4, 0, 0 ; D2820004 04120B11 V_MUL_F32_e32 v5, v48, v48 ; 100A6130 V_MAD_F32 v4, v18, v5, v4, 0, 0 ; D2820004 04120B12 V_MUL_F32_e32 v5, v40, v40 ; 100A5128 V_MAD_F32 v4, v45, v5, v4, 0, 0 ; D2820004 04120B2D V_MUL_F32_e32 v5, v58, v58 ; 100A753A V_MAD_F32 v4, v19, v5, v4, 0, 0 ; D2820004 04120B13 V_MUL_F32_e32 v5, v67, v67 ; 100A8743 V_MAD_F32 v4, v26, v5, v4, 0, 0 ; D2820004 04120B1A V_MUL_F32_e32 v4, v4, v0 ; 10080104 V_CVT_PKRTZ_F16_F32_e32 v4, v4, v27 ; 5E083704 EXP 15, 0, 1, 0, 0, v4, v1, v4, v1 ; F800040F 01040104 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x24 ; C0800524 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x48 ; C0C40748 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F IMAGE_SAMPLE v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800100 00020102 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v1, v1, v9 ; 10021301 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x20 ; C0800520 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x40 ; C0C40740 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v2, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[0:3] ; F0800100 0002020A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v8, v2, v1, 0, 0 ; D2820001 04060508 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x28 ; C0800528 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x50 ; C0C40750 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v2, 1, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[8:15], s[0:3] ; F0800100 00020214 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v16, v2, v1, 0, 0 ; D2820001 04060510 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x2c ; C080052C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x58 ; C0C40758 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v2, 1, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[8:15], s[0:3] ; F0800100 0002021D S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v17, v2, v1, 0, 0 ; D2820001 04060511 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x30 ; C0800530 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x60 ; C0C40760 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v2, 1, 0, 0, 0, 0, 0, 0, 0, v[46:47], s[8:15], s[0:3] ; F0800100 0002022E S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v18, v2, v1, 0, 0 ; D2820001 04060512 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x34 ; C0800534 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x68 ; C0C40768 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v2, 1, 0, 0, 0, 0, 0, 0, 0, v[38:39], s[8:15], s[0:3] ; F0800100 00020226 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v45, v2, v1, 0, 0 ; D2820001 0406052D S_LOAD_DWORDX4 s[0:3], s[4:5], 0x38 ; C0800538 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x70 ; C0C40770 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v2, 1, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[8:15], s[0:3] ; F0800100 00020238 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v19, v2, v1, 0, 0 ; D2820001 04060513 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x3c ; C080053C S_LOAD_DWORDX8 s[4:11], s[6:7], 0x78 ; C0C20778 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v2, 1, 0, 0, 0, 0, 0, 0, 0, v[65:66], s[4:11], s[0:3] ; F0800100 00010241 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v26, v2, v1, 0, 0 ; D2820001 0406051A V_MUL_F32_e32 v0, v0, v1 ; 10000300 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v0 ; 5E000100 EXP 15, 1, 1, 1, 1, v0, v0, v0, v0 ; F8001C1F 00000000 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xyzx, IMM[0].xxxy, IMM[0].yyyx 1: MOV OUT[1], IN[1].xyxy 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %15, 1.000000e+00 %25 = fadd float %24, 0.000000e+00 %26 = fmul float %16, 1.000000e+00 %27 = fadd float %26, 0.000000e+00 %28 = fmul float %17, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %15, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %22, float %23, float %22, float %23) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[0:3], s[8:9], 0x4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000100 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v1, v2 ; F800020F 02010201 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x0 ; C0800900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v4 ; F80008CF 04020100 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..1] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 2.0000} IMM[1] FLT32 { 1.0000, -1.0000, -2.0000, 0.5000} IMM[2] FLT32 { 2.0000, -2.0000, 0.0000, 4.0000} IMM[3] FLT32 { 0.0000, 0.5000, 2.0000, 0.0000} 0: MOV TEMP[0].xy, IMM[0].xyxx 1: MAD TEMP[1].zw, CONST[0].xyxz, IMM[0].xyyx, IN[0].xyxy 2: MAD TEMP[1].xy, TEMP[1].zwzw, IMM[0].xyxx, IMM[0].zxzz 3: MOV TEMP[2].xy, TEMP[1].xyyy 4: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D 5: MOV TEMP[2].z, TEMP[2].xxxx 6: ADD TEMP[3].zw, -CONST[0].xyxy, IN[0].xyxy 7: MAD TEMP[1].xy, TEMP[3].zwzw, IMM[0].xyxx, IMM[0].zxzz 8: MOV TEMP[3].xy, TEMP[1].xyyy 9: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 10: MOV TEMP[2].x, TEMP[3].xxxx 11: MAD TEMP[3].zw, CONST[0].xyxy, IMM[0].xyxy, IN[0].xyxy 12: MOV TEMP[0].w, TEMP[3].wwzw 13: MAD TEMP[1].xy, TEMP[3].zwzw, IMM[0].xyxx, IMM[0].zxzz 14: MOV TEMP[3].xy, TEMP[1].xyyy 15: TEX TEMP[3].xzw, TEMP[3], SAMP[0], 2D 16: MOV TEMP[4].zw, TEMP[3].wwzw 17: MOV TEMP[2].y, TEMP[3].xxxx 18: DP3 TEMP[3].x, TEMP[2].xyzz, IMM[0].xyww 19: MOV TEMP[0].z, TEMP[3].xxxx 20: ADD TEMP[5].xy, CONST[0].xzzw, IN[0] 21: MOV TEMP[4].xy, TEMP[5].xyxx 22: MAD TEMP[1].xy, TEMP[4], IMM[0].xyxx, IMM[0].zxzz 23: MOV TEMP[5].xy, TEMP[1].xyyy 24: TEX TEMP[5].xw, TEMP[5], SAMP[0], 2D 25: MOV TEMP[4].x, TEMP[5].xxxw 26: MAD TEMP[5].xy, CONST[0], TEMP[0].yxzw, IN[0] 27: MOV TEMP[5].xy, TEMP[5].xyxx 28: MAD TEMP[1].xy, TEMP[5], IMM[0].xyxx, IMM[0].zxzz 29: MOV TEMP[6].xy, TEMP[1].xyyy 30: TEX TEMP[6].xzw, TEMP[6], SAMP[0], 2D 31: MOV TEMP[5].zw, TEMP[6].wwzw 32: MOV TEMP[4].y, TEMP[6].xxxx 33: ADD TEMP[6].xy, CONST[0], IN[0] 34: MOV TEMP[5].xy, TEMP[6].xyxx 35: MAD TEMP[1].xy, TEMP[5], IMM[0].xyxx, IMM[0].zxzz 36: MOV TEMP[6].xy, TEMP[1].xyyy 37: TEX TEMP[6].xzw, TEMP[6], SAMP[0], 2D 38: MOV TEMP[5].zw, TEMP[6].wwzw 39: MOV TEMP[4].z, TEMP[6].xxxx 40: DP3 TEMP[6].x, TEMP[4].yzxx, IMM[1].xyzz 41: ADD TEMP[3].z, TEMP[6].xxxx, TEMP[3].xxxx 42: MOV TEMP[0].z, TEMP[3].zzzz 43: MUL TEMP[6].w, IMM[1].wwww, CONST[1].xxxx 44: MOV TEMP[0].w, TEMP[6].wwww 45: MUL TEMP[3].x, TEMP[6].wwww, TEMP[3].zzzz 46: MOV TEMP[5].x, TEMP[3].xxxx 47: MAD TEMP[3].xy, CONST[0].zyzw, TEMP[0], IN[0] 48: MOV TEMP[0].xy, TEMP[3].xyxx 49: MAD TEMP[1].xy, TEMP[0], IMM[0].xyxx, IMM[0].zxzz 50: MOV TEMP[3].xy, TEMP[1].xyyy 51: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 52: MOV TEMP[2].w, TEMP[3].xxxx 53: DP3 TEMP[3].x, TEMP[2].xyww, IMM[0].xxww 54: ADD TEMP[7].yz, CONST[0].xzyw, IN[0].xxyw 55: MOV TEMP[0].yz, TEMP[7].zyzz 56: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 57: MOV TEMP[1].xy, TEMP[1].xyyy 58: TEX TEMP[1].xw, TEMP[1], SAMP[0], 2D 59: MOV TEMP[2].w, TEMP[1].wwww 60: MOV TEMP[4].w, TEMP[1].xxxx 61: DP3 TEMP[1].x, TEMP[4].yzww, IMM[1].yyzz 62: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 63: MUL TEMP[1].y, TEMP[6].wwww, TEMP[1].xxxx 64: MOV TEMP[5].y, TEMP[1].yyyy 65: MUL TEMP[1].xy, TEMP[5], IMM[2].xyxy 66: MOV TEMP[0].xy, TEMP[1].xyxx 67: MOV TEMP[0].z, IMM[0].zzzz 68: ADD TEMP[1].xyz, -TEMP[0], IMM[2].zzwx 69: MOV TEMP[0].xy, TEMP[1].xyzx 70: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 71: MOV TEMP[0].z, TEMP[1].xxxx 72: MAX TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx 73: RSQ TEMP[1].x, TEMP[1].xxxx 74: MOV TEMP[2].z, TEMP[1].xxxx 75: MUL TEMP[0].xy, TEMP[0], TEMP[1].xxxx 76: MOV TEMP[2].xy, TEMP[0].xyxx 77: MAD TEMP[0].xyz, TEMP[2], IMM[3].yyzz, IMM[1].wwww 78: MOV TEMP[0].xyz, TEMP[0].xyzx 79: MOV TEMP[0].w, IMM[0].zzzz 80: MOV OUT[0], TEMP[0] 81: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %29 = load <8 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %31 = load <4 x i32> addrspace(2)* %30, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = fmul float %24, -1.000000e+00 %35 = fadd float %34, %32 %36 = fmul float %26, 1.000000e+00 %37 = fadd float %36, %33 %38 = fmul float %35, 1.000000e+00 %39 = fadd float %38, 0.000000e+00 %40 = fmul float %37, -1.000000e+00 %41 = fadd float %40, 1.000000e+00 %42 = bitcast float %39 to i32 %43 = bitcast float %41 to i32 %44 = insertelement <2 x i32> undef, i32 %42, i32 0 %45 = insertelement <2 x i32> %44, i32 %43, i32 1 %46 = bitcast <8 x i32> %29 to <32 x i8> %47 = bitcast <4 x i32> %31 to <16 x i8> %48 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %45, <32 x i8> %46, <16 x i8> %47, i32 2) %49 = extractelement <4 x float> %48, i32 0 %50 = fsub float -0.000000e+00, %24 %51 = fadd float %50, %32 %52 = fsub float -0.000000e+00, %25 %53 = fadd float %52, %33 %54 = fmul float %51, 1.000000e+00 %55 = fadd float %54, 0.000000e+00 %56 = fmul float %53, -1.000000e+00 %57 = fadd float %56, 1.000000e+00 %58 = bitcast float %55 to i32 %59 = bitcast float %57 to i32 %60 = insertelement <2 x i32> undef, i32 %58, i32 0 %61 = insertelement <2 x i32> %60, i32 %59, i32 1 %62 = bitcast <8 x i32> %29 to <32 x i8> %63 = bitcast <4 x i32> %31 to <16 x i8> %64 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %61, <32 x i8> %62, <16 x i8> %63, i32 2) %65 = extractelement <4 x float> %64, i32 0 %66 = fmul float %24, 1.000000e+00 %67 = fadd float %66, %32 %68 = fmul float %25, -1.000000e+00 %69 = fadd float %68, %33 %70 = fmul float %67, 1.000000e+00 %71 = fadd float %70, 0.000000e+00 %72 = fmul float %69, -1.000000e+00 %73 = fadd float %72, 1.000000e+00 %74 = bitcast float %71 to i32 %75 = bitcast float %73 to i32 %76 = insertelement <2 x i32> undef, i32 %74, i32 0 %77 = insertelement <2 x i32> %76, i32 %75, i32 1 %78 = bitcast <8 x i32> %29 to <32 x i8> %79 = bitcast <4 x i32> %31 to <16 x i8> %80 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %78, <16 x i8> %79, i32 2) %81 = extractelement <4 x float> %80, i32 0 %82 = fmul float %65, 1.000000e+00 %83 = fmul float %81, -1.000000e+00 %84 = fadd float %83, %82 %85 = fmul float %49, 2.000000e+00 %86 = fadd float %84, %85 %87 = fadd float %24, %32 %88 = fadd float %26, %33 %89 = fmul float %87, 1.000000e+00 %90 = fadd float %89, 0.000000e+00 %91 = fmul float %88, -1.000000e+00 %92 = fadd float %91, 1.000000e+00 %93 = bitcast float %90 to i32 %94 = bitcast float %92 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %29 to <32 x i8> %98 = bitcast <4 x i32> %31 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = fmul float %24, -1.000000e+00 %102 = fadd float %101, %32 %103 = fmul float %25, 1.000000e+00 %104 = fadd float %103, %33 %105 = fmul float %102, 1.000000e+00 %106 = fadd float %105, 0.000000e+00 %107 = fmul float %104, -1.000000e+00 %108 = fadd float %107, 1.000000e+00 %109 = bitcast float %106 to i32 %110 = bitcast float %108 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = bitcast <8 x i32> %29 to <32 x i8> %114 = bitcast <4 x i32> %31 to <16 x i8> %115 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %113, <16 x i8> %114, i32 2) %116 = extractelement <4 x float> %115, i32 0 %117 = fadd float %24, %32 %118 = fadd float %25, %33 %119 = fmul float %117, 1.000000e+00 %120 = fadd float %119, 0.000000e+00 %121 = fmul float %118, -1.000000e+00 %122 = fadd float %121, 1.000000e+00 %123 = bitcast float %120 to i32 %124 = bitcast float %122 to i32 %125 = insertelement <2 x i32> undef, i32 %123, i32 0 %126 = insertelement <2 x i32> %125, i32 %124, i32 1 %127 = bitcast <8 x i32> %29 to <32 x i8> %128 = bitcast <4 x i32> %31 to <16 x i8> %129 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %126, <32 x i8> %127, <16 x i8> %128, i32 2) %130 = extractelement <4 x float> %129, i32 0 %131 = fmul float %116, 1.000000e+00 %132 = fmul float %130, -1.000000e+00 %133 = fadd float %132, %131 %134 = fmul float %100, -2.000000e+00 %135 = fadd float %133, %134 %136 = fadd float %135, %86 %137 = fmul float 5.000000e-01, %27 %138 = fmul float %137, %136 %139 = fmul float %26, 1.000000e+00 %140 = fadd float %139, %32 %141 = fmul float %25, -1.000000e+00 %142 = fadd float %141, %33 %143 = fmul float %140, 1.000000e+00 %144 = fadd float %143, 0.000000e+00 %145 = fmul float %142, -1.000000e+00 %146 = fadd float %145, 1.000000e+00 %147 = bitcast float %144 to i32 %148 = bitcast float %146 to i32 %149 = insertelement <2 x i32> undef, i32 %147, i32 0 %150 = insertelement <2 x i32> %149, i32 %148, i32 1 %151 = bitcast <8 x i32> %29 to <32 x i8> %152 = bitcast <4 x i32> %31 to <16 x i8> %153 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %150, <32 x i8> %151, <16 x i8> %152, i32 2) %154 = extractelement <4 x float> %153, i32 0 %155 = fmul float %65, 1.000000e+00 %156 = fmul float %81, 1.000000e+00 %157 = fadd float %156, %155 %158 = fmul float %154, 2.000000e+00 %159 = fadd float %157, %158 %160 = fadd float %26, %32 %161 = fadd float %25, %33 %162 = fmul float %160, 1.000000e+00 %163 = fadd float %162, 0.000000e+00 %164 = fmul float %161, -1.000000e+00 %165 = fadd float %164, 1.000000e+00 %166 = bitcast float %163 to i32 %167 = bitcast float %165 to i32 %168 = insertelement <2 x i32> undef, i32 %166, i32 0 %169 = insertelement <2 x i32> %168, i32 %167, i32 1 %170 = bitcast <8 x i32> %29 to <32 x i8> %171 = bitcast <4 x i32> %31 to <16 x i8> %172 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %169, <32 x i8> %170, <16 x i8> %171, i32 2) %173 = extractelement <4 x float> %172, i32 0 %174 = fmul float %116, -1.000000e+00 %175 = fmul float %130, -1.000000e+00 %176 = fadd float %175, %174 %177 = fmul float %173, -2.000000e+00 %178 = fadd float %176, %177 %179 = fadd float %178, %159 %180 = fmul float %137, %179 %181 = fmul float %138, 2.000000e+00 %182 = fmul float %180, -2.000000e+00 %183 = fsub float -0.000000e+00, %181 %184 = fadd float %183, 0.000000e+00 %185 = fsub float -0.000000e+00, %182 %186 = fadd float %185, 0.000000e+00 %187 = fsub float -0.000000e+00, 0.000000e+00 %188 = fadd float %187, 4.000000e+00 %189 = fmul float %184, %184 %190 = fmul float %186, %186 %191 = fadd float %190, %189 %192 = fmul float %188, %188 %193 = fadd float %191, %192 %194 = fcmp uge float %193, 0x3E7AD7F2A0000000 %195 = select i1 %194, float %193, float 0x3E7AD7F2A0000000 %196 = call float @llvm.AMDGPU.rsq.clamped.f32(float %195) %197 = fmul float %184, %196 %198 = fmul float %186, %196 %199 = fmul float %197, 5.000000e-01 %200 = fadd float %199, 5.000000e-01 %201 = fmul float %198, 5.000000e-01 %202 = fadd float %201, 5.000000e-01 %203 = fmul float %196, 2.000000e+00 %204 = fadd float %203, 5.000000e-01 %205 = call i32 @llvm.SI.packf16(float %200, float %202) %206 = bitcast i32 %205 to float %207 = call i32 @llvm.SI.packf16(float %204, float 0.000000e+00) %208 = bitcast i32 %207 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %206, float %208, float %206, float %208) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[12:15], s[2:3], 0x0 ; C0860300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s16, s[12:15], 0x0 ; C2080D00 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s16, v2 ; 06060410 V_INTERP_P1_F32 v5, v0, 1, 0, [m0] ; C8140100 V_INTERP_P2_F32 v5, [v5], v1, 1, 0, [m0] ; C8150101 S_BUFFER_LOAD_DWORD s17, s[12:15], 0x1 ; C2088D01 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUB_F32_e32 v0, s17, v5 ; 08000A11 V_ADD_F32_e32 v4, 1.000000e+00, v0 ; 060800F2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v0, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800100 00010003 V_SUBREV_F32_e32 v6, s16, v2 ; 0A0C0410 V_MOV_B32_e32 v7, v4 ; 7E0E0304 IMAGE_SAMPLE v1, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800100 00010106 S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v8, v1, v0 ; 08100101 S_BUFFER_LOAD_DWORD s16, s[12:15], 0x2 ; C2080D02 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v9, s16, v5 ; 06120A10 V_SUB_F32_e32 v9, 1.000000e+00, v9 ; 081212F2 V_MOV_B32_e32 v7, v9 ; 7E0E0309 IMAGE_SAMPLE v10, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800100 00010A06 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v10, v10, v10 ; 0614150A V_ADD_F32_e32 v8, v8, v10 ; 06101508 V_ADD_F32_e32 v5, s17, v5 ; 060A0A11 V_SUB_F32_e32 v7, 1.000000e+00, v5 ; 080E0AF2 V_MOV_B32_e32 v10, v3 ; 7E140303 V_MOV_B32_e32 v11, v4 ; 7E160304 V_MOV_B32_e32 v11, v7 ; 7E160307 IMAGE_SAMPLE v5, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[4:11], s[0:3] ; F0800100 0001050A IMAGE_SAMPLE v10, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800100 00010A06 S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v11, v10, v5 ; 08160B0A V_MOV_B32_e32 v12, v3 ; 7E180303 V_MOV_B32_e32 v13, v4 ; 7E1A0304 V_MOV_B32_e32 v13, v9 ; 7E1A0309 IMAGE_SAMPLE v9, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[4:11], s[0:3] ; F0800100 0001090C S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v9, v9, -2.000000e+00, v11, 0, 0 ; D2820009 042DEB09 V_ADD_F32_e32 v8, v9, v8 ; 06101109 S_BUFFER_LOAD_DWORD s12, s[12:15], 0x4 ; C2060D04 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e64 v9, s12, 5.000000e-01, 0, 0 ; D2100009 0001E00C V_MUL_F32_e32 v11, v9, v8 ; 10161109 V_MAD_F32 v8, v9, v8, v11, 0, 0 ; D2820008 042E1109 V_ADD_F32_e32 v0, v0, v1 ; 06000300 V_ADD_F32_e32 v3, s16, v2 ; 06060410 IMAGE_SAMPLE v1, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800100 00010103 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v1, v1, v1 ; 06020301 V_ADD_F32_e32 v0, v0, v1 ; 06000300 V_MOV_B32_e32 v1, 0x80000000 ; 7E0202FF 80000000 V_XOR_B32_e32 v1, v5, v1 ; 3A020305 V_SUB_F32_e32 v1, v1, v10 ; 08021501 V_MOV_B32_e32 v4, v7 ; 7E080307 IMAGE_SAMPLE v2, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800100 00010203 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v2, -2.000000e+00, v1, 0, 0 ; D2820001 0405EB02 V_ADD_F32_e32 v0, v1, v0 ; 06000101 V_MUL_F32_e32 v0, v9, v0 ; 10000109 V_MUL_F32_e32 v0, -2.000000e+00, v0 ; 100000F5 V_MUL_F32_e32 v1, v0, v0 ; 10020100 V_MAD_F32 v1, v8, v8, v1, 0, 0 ; D2820001 04061108 V_ADD_F32_e32 v1, 1.600000e+01, v1 ; 060202FF 41800000 V_MOV_B32_e32 v2, 1.000000e-07 ; 7E0402FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v1, v2 ; 7C0C0501 V_CMP_U_F32_e64 s[0:1], v1, v1, 0, 0 ; D0100000 00020301 V_CNDMASK_B32_e64 v2, 0, -1, vcc, 0, 0, 0, 0 ; D2000002 01A98280 V_CNDMASK_B32_e64 v3, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000003 00018280 V_OR_B32_e32 v2, v2, v3 ; 38040702 V_MOV_B32_e32 v3, 0x33d6bf95 ; 7E0602FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v2, 0, 0, 0 ; D10A0000 00010102 V_CNDMASK_B32_e64 v1, v3, v1, s[0:1], 0, 0, 0, 0 ; D2000001 00020303 V_RSQ_CLAMP_F32_e32 v1, v1 ; 7E025901 V_MUL_F32_e32 v0, v0, v1 ; 10000300 V_MUL_F32_e32 v0, 5.000000e-01, v0 ; 100000F0 V_SUB_F32_e32 v0, 5.000000e-01, v0 ; 080000F0 V_MUL_F32_e32 v2, v8, v1 ; 10040308 V_MUL_F32_e32 v2, 5.000000e-01, v2 ; 100404F0 V_SUB_F32_e32 v2, 5.000000e-01, v2 ; 080404F0 V_CVT_PKRTZ_F16_F32_e32 v0, v2, v0 ; 5E000102 V_ADD_F32_e32 v1, v1, v1 ; 06020301 V_ADD_F32_e32 v1, 5.000000e-01, v1 ; 060202F0 V_CVT_PKRTZ_F16_F32_e64 v1, v1, 0.000000e+00, 0, 0 ; D25E0001 00010101 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[0], IN[0] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MAD TEMP[1].zw, IN[0].zzzz, IMM[0].xyxy, IMM[0].xyyx 3: MOV TEMP[0].zw, TEMP[1].wwzw 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV TEMP[1].zw, IMM[0].xxyx 6: MOV OUT[0], TEMP[0] 7: MOV OUT[1], TEMP[1] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %16 = load <16 x i8> addrspace(2)* %15, !tbaa !0 %17 = add i32 %5, %7 %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %17) %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = extractelement <4 x float> %18, i32 2 %22 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = add i32 %5, %7 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = fadd float %13, %19 %29 = fadd float %14, %20 %30 = fmul float %21, 1.000000e+00 %31 = fadd float %30, 0.000000e+00 %32 = fmul float %21, 0.000000e+00 %33 = fadd float %32, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %26, float %27, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %28, float %29, float %31, float %33) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 1.000000e+00 ; 7E0A02F2 V_MOV_B32_e32 v6, 0.000000e+00 ; 7E0C0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v6, v5 ; F800020F 05060201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s4, v1 ; 06080204 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s0, v0 ; 060C0000 EXP 15, 12, 0, 1, 0, v6, v4, v2, v5 ; F80008CF 05020406 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..4] DCL TEMP[0..21], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, 0.0000, 0.3145} IMM[1] FLT32 { 1.0000, -64.0000, 0.0104, 0.0000} IMM[2] INT32 {40, 0, -1, 0} 0: ABS TEMP[0], CONST[1] 1: ADD TEMP[1], -TEMP[0].yyyy, TEMP[0].xxxx 2: MOV TEMP[2].z, TEMP[1].zzzz 3: RCP TEMP[3].x, TEMP[0].yyyy 4: MUL TEMP[3], TEMP[3].xxxx, CONST[1].xxzy 5: MOV TEMP[2].yw, TEMP[3].wyww 6: RCP TEMP[2].x, TEMP[0].xxxx 7: MUL TEMP[0], TEMP[2].xxxx, CONST[1] 8: MOV TEMP[4], TEMP[2].ywzw 9: FSGE TEMP[3].x, TEMP[1].zzzz, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].x, TEMP[0].xxxx 12: ELSE :0 13: MOV TEMP[3].x, TEMP[2].yyyy 14: ENDIF 15: MOV TEMP[3].x, TEMP[3].xxxx 16: FSGE TEMP[5].x, TEMP[1].zzzz, IMM[0].xxxx 17: UIF TEMP[5].xxxx :0 18: MOV TEMP[5].x, TEMP[0].yyyy 19: ELSE :0 20: MOV TEMP[5].x, TEMP[2].wwww 21: ENDIF 22: MOV TEMP[3].y, TEMP[5].xxxx 23: FSGE TEMP[5].x, TEMP[1].zzzz, IMM[0].xxxx 24: UIF TEMP[5].xxxx :0 25: MOV TEMP[5].x, TEMP[0].zzzz 26: ELSE :0 27: MOV TEMP[5].x, TEMP[2].zzzz 28: ENDIF 29: MOV TEMP[3].z, TEMP[5].xxxx 30: FSGE TEMP[1].x, TEMP[1].zzzz, IMM[0].xxxx 31: UIF TEMP[1].xxxx :0 32: MOV TEMP[1].x, TEMP[0].wwww 33: ELSE :0 34: MOV TEMP[1].x, TEMP[2].wwww 35: ENDIF 36: MOV TEMP[3].w, TEMP[1].xxxx 37: MOV TEMP[2].xy, TEMP[3].xyxx 38: ADD TEMP[1], CONST[3].xyxy, IN[0].xyxy 39: MOV TEMP[2].zw, TEMP[1].wwzw 40: MUL TEMP[1], TEMP[2], CONST[2].xyxy 41: MOV TEMP[2].w, TEMP[1].wwzw 42: MOV TEMP[1].xy, TEMP[1].zwww 43: TEX TEMP[1], TEMP[1], SAMP[0], 2D 44: MUL TEMP[1], TEMP[1].wwww, CONST[0].zzzz 45: MAD TEMP[1], TEMP[1].zzzz, IMM[0].yyyy, CONST[4].zzzz 46: MOV TEMP[2].z, TEMP[1].zzzz 47: ADD TEMP[5], TEMP[2], IN[0] 48: MOV TEMP[0].xy, TEMP[5].xyxx 49: MUL TEMP[3], TEMP[3].xyxy, CONST[0].xyxy 50: MOV TEMP[0].zw, TEMP[3].wwzw 51: DP2 TEMP[3].x, TEMP[3].zwww, TEMP[3].zwww 52: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].zzzz 53: RSQ TEMP[5].x, TEMP[3].xxxx 54: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[3].xxxx 55: CMP TEMP[5].x, -TEMP[3].xxxx, TEMP[5].xxxx, IMM[0].xxxx 56: MOV TEMP[2].w, TEMP[5].xxxx 57: MAD TEMP[1], TEMP[5].xxxx, IMM[0].wwww, TEMP[1].zzzz 58: MOV TEMP[2].z, TEMP[1].zzzz 59: ADD TEMP[3], TEMP[0], CONST[3] 60: MOV TEMP[0].xy, TEMP[3].xyxx 61: MUL TEMP[3], TEMP[0], CONST[2] 62: MOV TEMP[0].xy, TEMP[3].xyxx 63: MOV TEMP[0].zw, TEMP[3].yyxy 64: MOV TEMP[3].x, IMM[1].xxxx 65: MOV TEMP[3].y, TEMP[1].zzzz 66: MOV TEMP[1].x, IMM[2].xxxx 67: BGNLOOP :0 68: ISGE TEMP[5].x, IMM[2].yyyy, TEMP[1].xxxx 69: UIF TEMP[5].xxxx :0 70: BRK 71: ENDIF 72: MAD TEMP[6], TEMP[2].xyxy, CONST[2].xyxy, TEMP[0] 73: MOV TEMP[0].zw, TEMP[6].wwzw 74: MAD TEMP[7], TEMP[2].wwww, IMM[0].wwww, TEMP[3].yyyy 75: MOV TEMP[3].y, TEMP[7].yyyy 76: MOV TEMP[8].xy, TEMP[6].zwww 77: TEX TEMP[9], TEMP[8], SAMP[0], 2D 78: MOV TEMP[10], TEMP[9] 79: MOV TEMP[11].yzw, TEMP[9].zyzw 80: MUL TEMP[12], TEMP[9].wwww, IMM[0].yyyy 81: MAD TEMP[13], TEMP[9].wwww, IMM[0].yyyy, IMM[1].yyyy 82: MUL TEMP[4], TEMP[13].wwww, IMM[1].zzzz 83: MOV_SAT TEMP[14], TEMP[4] 84: ADD TEMP[15], -TEMP[14].wwww, IMM[1].xxxx 85: MOV TEMP[3].w, TEMP[15].wwww 86: MAD TEMP[16], TEMP[12].zzzz, -CONST[0].zzzz, TEMP[7].yyyy 87: MOV TEMP[3].z, TEMP[16].zzzz 88: MIN TEMP[17], TEMP[15].wwww, TEMP[3].xxxx 89: MOV TEMP[11].x, TEMP[17].xxxx 90: MOV TEMP[18].x, TEMP[3].xxxx 91: MOV TEMP[19].x, TEMP[17].xxxx 92: FSGE TEMP[20].x, TEMP[16].zzzz, IMM[0].xxxx 93: UIF TEMP[20].xxxx :0 94: MOV TEMP[21].x, TEMP[3].xxxx 95: ELSE :0 96: MOV TEMP[21].x, TEMP[17].xxxx 97: ENDIF 98: MOV TEMP[3].x, TEMP[21].xxxx 99: UADD TEMP[1].x, TEMP[1].xxxx, IMM[2].zzzz 100: ENDLOOP :0 101: ADD TEMP[0].x, -TEMP[3].xxxx, IMM[1].xxxx 102: MUL TEMP[0], TEMP[0].xxxx, CONST[4].zzzz 103: MOV OUT[0], TEMP[0] 104: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %36 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %37 = load <8 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %39 = load <4 x i32> addrspace(2)* %38, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @fabs(float %27) %43 = call float @fabs(float %28) %44 = call float @fabs(float %29) %45 = call float @fabs(float %30) %46 = fsub float -0.000000e+00, %43 %47 = fadd float %46, %42 %48 = fdiv float 1.000000e+00, %43 %49 = fmul float %48, %27 %50 = fmul float %48, %28 %51 = fdiv float 1.000000e+00, %42 %52 = fmul float %51, %27 %53 = fmul float %51, %28 %54 = fcmp oge float %47, 0.000000e+00 %55 = sext i1 %54 to i32 %56 = bitcast i32 %55 to float %57 = bitcast float %56 to i32 %58 = icmp ne i32 %57, 0 %. = select i1 %58, float %52, float %49 %59 = fcmp oge float %47, 0.000000e+00 %60 = sext i1 %59 to i32 %61 = bitcast i32 %60 to float %62 = bitcast float %61 to i32 %63 = icmp ne i32 %62, 0 %temp20.0 = select i1 %63, float %53, float %50 %64 = fadd float %33, %40 %65 = fadd float %34, %41 %66 = fmul float %64, %31 %67 = fmul float %65, %32 %68 = bitcast float %66 to i32 %69 = bitcast float %67 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = bitcast <8 x i32> %37 to <32 x i8> %73 = bitcast <4 x i32> %39 to <16 x i8> %74 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %71, <32 x i8> %72, <16 x i8> %73, i32 2) %75 = extractelement <4 x float> %74, i32 3 %76 = fmul float %75, %26 %77 = fmul float %76, 2.550000e+02 %78 = fadd float %77, %35 %79 = fadd float %., %40 %80 = fadd float %temp20.0, %41 %81 = fmul float %., %24 %82 = fmul float %temp20.0, %25 %83 = fmul float %81, %81 %84 = fmul float %82, %82 %85 = fadd float %83, %84 %86 = fcmp uge float %85, 0x3E7AD7F2A0000000 %87 = select i1 %86, float %85, float 0x3E7AD7F2A0000000 %88 = call float @llvm.AMDGPU.rsq.clamped.f32(float %87) %89 = fmul float %88, %87 %90 = fsub float -0.000000e+00, %87 %91 = call float @llvm.AMDGPU.cndlt(float %90, float %89, float 0.000000e+00) %92 = fmul float %91, 0x3FD42085C0000000 %93 = fadd float %92, %78 %94 = fadd float %79, %33 %95 = fadd float %80, %34 %96 = fmul float %94, %31 %97 = fmul float %95, %32 %98 = fmul float %., %31 %99 = fmul float %temp20.0, %32 %100 = fmul float %91, 0x3FD42085C0000000 %101 = bitcast <8 x i32> %37 to <32 x i8> %102 = bitcast <4 x i32> %39 to <16 x i8> %103 = fsub float -0.000000e+00, %26 br label %LOOP LOOP: ; preds = %ENDIF97, %main_body %temp12.1 = phi float [ 1.000000e+00, %main_body ], [ %temp12.1., %ENDIF97 ] %temp13.0 = phi float [ %93, %main_body ], [ %122, %ENDIF97 ] %temp4.1 = phi float [ 0x36F4000000000000, %main_body ], [ %153, %ENDIF97 ] %temp3.0 = phi float [ %97, %main_body ], [ %121, %ENDIF97 ] %temp2.0 = phi float [ %96, %main_body ], [ %120, %ENDIF97 ] %104 = bitcast float %temp4.1 to i32 %105 = icmp sge i32 0, %104 %106 = sext i1 %105 to i32 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = icmp ne i32 %108, 0 br i1 %109, label %IF98, label %ENDIF97 IF98: ; preds = %LOOP %temp12.1.lcssa = phi float [ %temp12.1, %LOOP ] %110 = fsub float -0.000000e+00, %temp12.1.lcssa %111 = fadd float %110, 1.000000e+00 %112 = fmul float %111, %35 %113 = fmul float %111, %35 %114 = fmul float %111, %35 %115 = fmul float %111, %35 %116 = call i32 @llvm.SI.packf16(float %112, float %113) %117 = bitcast i32 %116 to float %118 = call i32 @llvm.SI.packf16(float %114, float %115) %119 = bitcast i32 %118 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %117, float %119, float %117, float %119) ret void ENDIF97: ; preds = %LOOP %120 = fadd float %98, %temp2.0 %121 = fadd float %99, %temp3.0 %122 = fadd float %100, %temp13.0 %123 = bitcast float %120 to i32 %124 = bitcast float %121 to i32 %125 = insertelement <2 x i32> undef, i32 %123, i32 0 %126 = insertelement <2 x i32> %125, i32 %124, i32 1 %127 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %126, <32 x i8> %101, <16 x i8> %102, i32 2) %128 = extractelement <4 x float> %127, i32 3 %129 = fmul float %128, 2.550000e+02 %130 = fmul float %128, 2.550000e+02 %131 = fadd float %130, -6.400000e+01 %132 = fmul float %131, 0x3F855559C0000000 %133 = fmul float %131, 0x3F855559C0000000 %134 = fmul float %131, 0x3F855559C0000000 %135 = fmul float %131, 0x3F855559C0000000 %136 = call float @llvm.AMDIL.clamp.(float %132, float 0.000000e+00, float 1.000000e+00) %137 = call float @llvm.AMDIL.clamp.(float %133, float 0.000000e+00, float 1.000000e+00) %138 = call float @llvm.AMDIL.clamp.(float %134, float 0.000000e+00, float 1.000000e+00) %139 = call float @llvm.AMDIL.clamp.(float %135, float 0.000000e+00, float 1.000000e+00) %140 = fsub float -0.000000e+00, %139 %141 = fadd float %140, 1.000000e+00 %142 = fmul float %129, %103 %143 = fadd float %142, %122 %144 = fcmp uge float %141, %temp12.1 %145 = select i1 %144, float %temp12.1, float %141 %146 = fcmp oge float %143, 0.000000e+00 %147 = sext i1 %146 to i32 %148 = bitcast i32 %147 to float %149 = bitcast float %148 to i32 %150 = icmp ne i32 %149, 0 %temp12.1. = select i1 %150, float %temp12.1, float %145 %151 = bitcast float %temp4.1 to i32 %152 = add i32 %151, -1 %153 = bitcast i32 %152 to float br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_LOAD_DWORDX4 s[20:23], s[2:3], 0x0 ; C08A0300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[20:23], 0x4 ; C2001504 V_MOV_B32_e32 v2, 0x7fffffff ; 7E0402FF 7FFFFFFF S_WAITCNT lgkmcnt(0) ; BF8C007F V_AND_B32_e32 v3, s0, v2 ; 36060400 V_RCP_F32_e32 v4, v3 ; 7E085503 V_MUL_F32_e32 v5, s0, v4 ; 100A0800 S_BUFFER_LOAD_DWORD s1, s[20:23], 0x5 ; C2009505 S_WAITCNT lgkmcnt(0) ; BF8C007F V_AND_B32_e32 v2, s1, v2 ; 36040401 V_RCP_F32_e32 v2, v2 ; 7E045502 V_MUL_F32_e32 v6, s0, v2 ; 100C0400 V_SUB_F32_e64 v3, v3, |s1|, 0, 0 ; D2080203 00000303 V_CMP_GE_F32_e64 s[2:3], v3, 0.000000e+00, 0, 0 ; D00C0002 00010103 V_CNDMASK_B32_e64 v6, v6, v5, s[2:3], 0, 0, 0, 0 ; D2000006 000A0B06 S_BUFFER_LOAD_DWORD s0, s[20:23], 0x0 ; C2001500 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s0, v6 ; 10060C00 V_MUL_F32_e32 v4, s1, v4 ; 10080801 V_MUL_F32_e32 v2, s1, v2 ; 10040401 V_CNDMASK_B32_e64 v4, v2, v4, s[2:3], 0, 0, 0, 0 ; D2000004 000A0902 S_BUFFER_LOAD_DWORD s0, s[20:23], 0x1 ; C2001501 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s0, v4 ; 10040800 V_MUL_F32_e32 v2, v2, v2 ; 10040502 V_MAD_F32 v2, v3, v3, v2, 0, 0 ; D2820002 040A0703 V_MOV_B32_e32 v3, 1.000000e-07 ; 7E0602FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v2, v3 ; 7C0C0702 V_CMP_U_F32_e64 s[0:1], v2, v2, 0, 0 ; D0100000 00020502 V_CNDMASK_B32_e64 v3, 0, -1, vcc, 0, 0, 0, 0 ; D2000003 01A98280 V_CNDMASK_B32_e64 v5, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000005 00018280 V_OR_B32_e32 v3, v3, v5 ; 38060B03 V_MOV_B32_e32 v5, 0x33d6bf95 ; 7E0A02FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v3, 0, 0, 0 ; D10A0000 00010103 V_CNDMASK_B32_e64 v2, v5, v2, s[0:1], 0, 0, 0, 0 ; D2000002 00020505 V_RSQ_CLAMP_F32_e32 v3, v2 ; 7E065902 V_MUL_F32_e32 v3, v3, v2 ; 10060503 V_MOV_B32_e32 v5, 0x80000000 ; 7E0A02FF 80000000 V_XOR_B32_e32 v2, v2, v5 ; 3A040B02 V_CMP_GT_F32_e32 vcc, 0, v2 ; 7C080480 V_CNDMASK_B32_e64 v2, 0.000000e+00, v3, vcc, 0, 0, 0, 0 ; D2000002 01AA0680 S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v5, v0, 1, 0, [m0] ; C8140100 V_INTERP_P2_F32 v5, [v5], v1, 1, 0, [m0] ; C8150101 S_BUFFER_LOAD_DWORD s0, s[20:23], 0xd ; C200150D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s0, v5 ; 06060A00 S_BUFFER_LOAD_DWORD s1, s[20:23], 0x9 ; C2009509 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s1, v3 ; 10100601 V_INTERP_P1_F32 v9, v0, 0, 0, [m0] ; C8240000 V_INTERP_P2_F32 v9, [v9], v1, 0, 0, [m0] ; C8250001 S_BUFFER_LOAD_DWORD s2, s[20:23], 0xc ; C201150C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s2, v9 ; 06001202 S_BUFFER_LOAD_DWORD s3, s[20:23], 0x8 ; C2019508 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s3, v0 ; 100E0003 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x0 ; C0840500 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x0 ; C0C60700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v0, 8, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[12:19], s[8:11] ; F0800800 00430007 S_BUFFER_LOAD_DWORD s4, s[20:23], 0x2 ; C2021502 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s5, s[20:23], 0x12 ; C2029512 V_MOV_B32_e32 v1, 2.550000e+02 ; 7E0202FF 437F0000 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, v1, s5, 0, 0 ; D2820000 00160300 V_MOV_B32_e32 v1, 3.144850e-01 ; 7E0202FF 3EA1042E V_MAD_F32 v0, v2, v1, v0, 0, 0 ; D2820000 04020302 V_MUL_F32_e32 v1, 3.144850e-01, v2 ; 100204FF 3EA1042E V_MUL_F32_e32 v2, s1, v4 ; 10040801 V_MUL_F32_e32 v3, s3, v6 ; 10060C03 V_ADD_F32_e32 v4, v4, v5 ; 06080B04 V_ADD_F32_e32 v4, s0, v4 ; 06080800 V_MUL_F32_e32 v5, s1, v4 ; 100A0801 V_ADD_F32_e32 v6, v6, v9 ; 060C1306 V_ADD_F32_e32 v6, s2, v6 ; 060C0C02 V_MUL_F32_e32 v4, s3, v6 ; 10080C03 V_MOV_B32_e32 v6, 0x80000000 ; 7E0C02FF 80000000 V_XOR_B32_e32 v6, s4, v6 ; 3A0C0C04 V_MOV_B32_e32 v8, 5.605194e-44 ; 7E1002A8 V_MOV_B32_e32 v10, 1.000000e+00 ; 7E1402F2 S_MOV_B64 s[2:3], 0 ; BE820480 V_MOV_B32_e32 v7, s5 ; 7E0E0205 V_MOV_B32_e32 v9, v10 ; 7E12030A V_CMP_LT_I32_e64 s[0:1], v8, 1, 0, 0 ; D1020000 00010308 V_CNDMASK_B32_e64 v10, 0, -1, s[0:1], 0, 0, 0, 0 ; D200000A 00018280 V_CMP_EQ_I32_e64 s[0:1], v10, 0, 0, 0 ; D1040000 0001010A S_AND_SAVEEXEC_B64 s[4:5], s[0:1] ; BE842400 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E S_CBRANCH_EXECZ BB0_2 ; BF880000 V_ADD_F32_e32 v5, v2, v5 ; 060A0B02 V_ADD_F32_e32 v4, v3, v4 ; 06080903 IMAGE_SAMPLE v10, 8, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800800 00430A04 V_MOV_B32_e32 v11, -6.400000e+01 ; 7E1602FF C2800000 V_MOV_B32_e32 v12, 2.550000e+02 ; 7E1802FF 437F0000 S_WAITCNT vmcnt(0) expcnt(0) ; BF8C0700 V_MAD_F32 v11, v10, v12, v11, 0, 0 ; D282000B 042E190A V_MUL_F32_e32 v11, 1.041670e-02, v11 ; 101616FF 3C2AAACE V_ADD_F32_e64 v11, v11, 0, 1, 0 ; D206080B 0001010B V_SUB_F32_e32 v11, 1.000000e+00, v11 ; 081616F2 V_CMP_U_F32_e32 vcc, v11, v9 ; 7C10130B V_CMP_GE_F32_e64 s[0:1], v11, v9, 0, 0 ; D00C0000 0002130B V_CNDMASK_B32_e64 v12, 0, -1, s[0:1], 0, 0, 0, 0 ; D200000C 00018280 V_CNDMASK_B32_e64 v13, 0, -1, vcc, 0, 0, 0, 0 ; D200000D 01A98280 V_OR_B32_e32 v12, v12, v13 ; 38181B0C V_CMP_NE_I32_e64 s[0:1], v12, 0, 0, 0 ; D10A0000 0001010C V_CNDMASK_B32_e64 v11, v11, v9, s[0:1], 0, 0, 0, 0 ; D200000B 0002130B V_MUL_F32_e32 v10, 2.550000e+02, v10 ; 101414FF 437F0000 V_ADD_F32_e32 v0, v1, v0 ; 06000101 V_MAD_F32 v10, v10, v6, v0, 0, 0 ; D282000A 04020D0A V_CMP_GE_F32_e64 s[0:1], v10, 0.000000e+00, 0, 0 ; D00C0000 0001010A V_CNDMASK_B32_e64 v10, v11, v9, s[0:1], 0, 0, 0, 0 ; D200000A 0002130B V_ADD_I32_e32 v8, -1, v8 ; 4A1010C1 S_OR_B64 exec, exec, s[4:5] ; 88FE047E S_OR_B64 s[2:3], s[4:5], s[2:3] ; 88820204 S_ANDN2_B64 exec, exec, s[2:3] ; 8AFE027E S_CBRANCH_EXECNZ BB0_1 ; BF890000 S_OR_B64 exec, exec, s[2:3] ; 88FE027E V_SUB_F32_e32 v0, 1.000000e+00, v9 ; 080012F2 V_MUL_F32_e32 v0, v0, v7 ; 10000F00 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v0 ; 5E000100 EXP 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.0000, -2.0000, 0.0000, 1.0000} IMM[1] FLT32 { -1.0000, 1.0000, 0.5000, 0.0000} 0: MAD TEMP[0], IN[1].xyxx, IMM[0].xyzz, IMM[1].xyzy 1: MOV TEMP[1].xyz, IN[0].xyzx 2: MUL TEMP[2], IMM[0].wwzz, IN[1].xyxx 3: MOV OUT[2], TEMP[2] 4: MOV OUT[1], TEMP[1] 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %22, 2.000000e+00 %25 = fadd float %24, -1.000000e+00 %26 = fmul float %23, -2.000000e+00 %27 = fadd float %26, 1.000000e+00 %28 = fmul float %22, 0.000000e+00 %29 = fadd float %28, 5.000000e-01 %30 = fmul float %22, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 %32 = fmul float 1.000000e+00, %22 %33 = fmul float 1.000000e+00, %23 %34 = fmul float 0.000000e+00, %22 %35 = fmul float 0.000000e+00, %22 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %15, float %16, float %17, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %32, float %33, float %34, float %35) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[0:3], s[8:9], 0x0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000100 V_MOV_B32_e32 v5, 0.000000e+00 ; 7E0A0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v3, v5 ; F800020F 05030201 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x4 ; C0800904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 33, 0, 0, 0, v0, v1, v5, v5 ; F800021F 05050100 V_MAD_F32 v4, v1, -2.000000e+00, 1.000000e+00, 0, 0 ; D2820004 03C9EB01 S_WAITCNT expcnt(0) ; BF8C070F V_ADD_F32_e32 v0, v0, v0 ; 06000100 V_ADD_F32_e32 v0, -1.000000e+00, v0 ; 060000F3 V_MOV_B32_e32 v1, 1.000000e+00 ; 7E0202F2 V_MOV_B32_e32 v2, 5.000000e-01 ; 7E0402F0 EXP 15, 12, 0, 1, 0, v0, v4, v2, v1 ; F80008CF 01020400 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -0.0000, -1.0000} IMM[1] FLT32 { 0.0000, 2.0000, 2.2000, 1.0000} IMM[2] FLT32 { 1.7000, 0.0010, 0.4545, 0.0000} 0: MUL TEMP[0].xyz, IMM[0].xyxy, IN[0].yzxw 1: MOV TEMP[0].xyz, TEMP[0].xyzx 2: MAD TEMP[1].xyz, IN[0].yzxw, IMM[0].xxyy, -TEMP[0].yzxw 3: MOV TEMP[2].xyz, TEMP[1].xyzx 4: MAD TEMP[3].xyz, IN[0].zxyw, IMM[0].zzww, TEMP[0] 5: MOV TEMP[0].xyz, TEMP[3].xyzx 6: DP2 TEMP[1].x, TEMP[1].xzzz, TEMP[1].xzzz 7: MAX TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 8: RSQ TEMP[1].x, TEMP[1].xxxx 9: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[2] 10: MOV TEMP[2].xyz, TEMP[1].xyzx 11: DP2 TEMP[1].x, TEMP[3].yzzz, TEMP[3].yzzz 12: MAX TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 13: RSQ TEMP[1].x, TEMP[1].xxxx 14: MOV TEMP[0].w, TEMP[1].xxxx 15: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[0] 16: MOV TEMP[0].xyz, TEMP[1].xyzx 17: MUL TEMP[1], IN[1], IMM[0].ywyy 18: ADD TEMP[3].xy, TEMP[1], IMM[0].xyxx 19: MOV TEMP[3].xy, TEMP[3].xyyy 20: TEX TEMP[3], TEMP[3], SAMP[0], 2D 21: MAD TEMP[3].xyz, TEMP[3], IMM[1].yyyy, IMM[0].wwww 22: MUL TEMP[4].xyz, TEMP[0], TEMP[3].yyyy 23: MOV TEMP[0].xyz, TEMP[4].xyzx 24: MAD TEMP[4].xyz, TEMP[3].xxxx, TEMP[2], TEMP[0] 25: MOV TEMP[0].xyz, TEMP[4].xyzx 26: MAD TEMP[3].xyz, TEMP[3].zzzz, IN[0], TEMP[0] 27: MOV TEMP[4].xyz, TEMP[3].xyzz 28: TEX TEMP[4], TEMP[4], SAMP[4], CUBE 29: POW TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz 30: POW TEMP[5].y, TEMP[4].yyyy, IMM[1].zzzz 31: POW TEMP[5].z, TEMP[4].zzzz, IMM[1].zzzz 32: POW TEMP[5].w, TEMP[4].wwww, IMM[1].wwww 33: MOV TEMP[2].w, TEMP[5].wwww 34: MOV TEMP[3].xyz, TEMP[3].xyzz 35: TEX TEMP[3], TEMP[3], SAMP[3], CUBE 36: POW TEMP[4].x, TEMP[3].xxxx, IMM[1].zzzz 37: POW TEMP[4].y, TEMP[3].yyyy, IMM[1].zzzz 38: POW TEMP[4].z, TEMP[3].zzzz, IMM[1].zzzz 39: POW TEMP[4].w, TEMP[3].wwww, IMM[1].wwww 40: MOV TEMP[0].w, TEMP[4].wwww 41: MUL TEMP[3].xyz, TEMP[4], IMM[2].xxxx 42: MOV TEMP[0].xyz, TEMP[3].xyzx 43: MAD TEMP[3].xyz, TEMP[5], IMM[2].xxxx, -TEMP[0] 44: MOV TEMP[2].xyz, TEMP[3].xyzx 45: ADD TEMP[3].xy, TEMP[1], IMM[0].xyxx 46: MOV TEMP[3].xy, TEMP[3].xyyy 47: TEX TEMP[3].x, TEMP[3], SAMP[2], 2D 48: MAD TEMP[3].xyz, TEMP[3].xxxx, TEMP[2], TEMP[0] 49: MOV TEMP[0].xyz, TEMP[3].xyzx 50: ADD TEMP[1].xy, TEMP[1], IMM[0].xyxx 51: MOV TEMP[1].xy, TEMP[1].xyyy 52: TEX TEMP[1], TEMP[1], SAMP[1], 2D 53: MOV TEMP[2].w, TEMP[1].wwww 54: MAD TEMP[0].xyz, TEMP[1], TEMP[0], IMM[2].yyyy 55: ABS TEMP[1].x, TEMP[0].xxxx 56: LG2 TEMP[2].x, TEMP[1].xxxx 57: ABS TEMP[1].x, TEMP[0].yyyy 58: LG2 TEMP[1].x, TEMP[1].xxxx 59: MOV TEMP[2].y, TEMP[1].xxxx 60: ABS TEMP[0].x, TEMP[0].zzzz 61: LG2 TEMP[0].x, TEMP[0].xxxx 62: MOV TEMP[2].z, TEMP[0].xxxx 63: MUL TEMP[0].xyz, TEMP[2], IMM[2].zzzz 64: EX2 TEMP[2].x, TEMP[0].xxxx 65: EX2 TEMP[1].x, TEMP[0].yyyy 66: MOV TEMP[2].y, TEMP[1].xxxx 67: EX2 TEMP[0].x, TEMP[0].zzzz 68: MOV TEMP[2].z, TEMP[0].xxxx 69: MOV TEMP[0].xyz, TEMP[2].xyzz 70: TEX TEMP[0].xyz, TEMP[0], SAMP[5], 3D 71: MOV TEMP[0].xyz, TEMP[0].xyzx 72: MOV TEMP[0].w, IMM[0].yyyy 73: MOV OUT[0], TEMP[0] 74: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %51 = fmul float 0.000000e+00, %47 %52 = fmul float 1.000000e+00, %48 %53 = fmul float 0.000000e+00, %46 %54 = fsub float -0.000000e+00, %52 %55 = fmul float %47, 0.000000e+00 %56 = fadd float %55, %54 %57 = fsub float -0.000000e+00, %53 %58 = fmul float %48, 0.000000e+00 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %51 %61 = fmul float %46, 1.000000e+00 %62 = fadd float %61, %60 %63 = fmul float %48, -0.000000e+00 %64 = fadd float %63, %51 %65 = fmul float %46, -0.000000e+00 %66 = fadd float %65, %52 %67 = fmul float %47, -1.000000e+00 %68 = fadd float %67, %53 %69 = fmul float %56, %56 %70 = fmul float %62, %62 %71 = fadd float %69, %70 %72 = fcmp uge float %71, 0x3E7AD7F2A0000000 %73 = select i1 %72, float %71, float 0x3E7AD7F2A0000000 %74 = call float @llvm.AMDGPU.rsq.clamped.f32(float %73) %75 = fmul float %74, %56 %76 = fmul float %74, %59 %77 = fmul float %74, %62 %78 = fmul float %66, %66 %79 = fmul float %68, %68 %80 = fadd float %78, %79 %81 = fcmp uge float %80, 0x3E7AD7F2A0000000 %82 = select i1 %81, float %80, float 0x3E7AD7F2A0000000 %83 = call float @llvm.AMDGPU.rsq.clamped.f32(float %82) %84 = fmul float %83, %64 %85 = fmul float %83, %66 %86 = fmul float %83, %68 %87 = fmul float %49, 1.000000e+00 %88 = fmul float %50, -1.000000e+00 %89 = fadd float %87, 0.000000e+00 %90 = fadd float %88, 1.000000e+00 %91 = bitcast float %89 to i32 %92 = bitcast float %90 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %23 to <32 x i8> %96 = bitcast <4 x i32> %25 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = extractelement <4 x float> %97, i32 3 %102 = fmul float %98, 2.000000e+00 %103 = fadd float %102, -1.000000e+00 %104 = fmul float %99, 2.000000e+00 %105 = fadd float %104, -1.000000e+00 %106 = fmul float %100, 2.000000e+00 %107 = fadd float %106, -1.000000e+00 %108 = fmul float %84, %105 %109 = fmul float %85, %105 %110 = fmul float %86, %105 %111 = fmul float %103, %75 %112 = fadd float %111, %108 %113 = fmul float %103, %76 %114 = fadd float %113, %109 %115 = fmul float %103, %77 %116 = fadd float %115, %110 %117 = fmul float %107, %46 %118 = fadd float %117, %112 %119 = fmul float %107, %47 %120 = fadd float %119, %114 %121 = fmul float %107, %48 %122 = fadd float %121, %116 %123 = insertelement <4 x float> undef, float %118, i32 0 %124 = insertelement <4 x float> %123, float %120, i32 1 %125 = insertelement <4 x float> %124, float %122, i32 2 %126 = insertelement <4 x float> %125, float 0.000000e+00, i32 3 %127 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %126) %128 = extractelement <4 x float> %127, i32 0 %129 = extractelement <4 x float> %127, i32 1 %130 = extractelement <4 x float> %127, i32 2 %131 = extractelement <4 x float> %127, i32 3 %132 = call float @fabs(float %130) %133 = fdiv float 1.000000e+00, %132 %134 = fmul float %128, %133 %135 = fadd float %134, 1.500000e+00 %136 = fmul float %129, %133 %137 = fadd float %136, 1.500000e+00 %138 = bitcast float %137 to i32 %139 = bitcast float %135 to i32 %140 = bitcast float %131 to i32 %141 = insertelement <4 x i32> undef, i32 %138, i32 0 %142 = insertelement <4 x i32> %141, i32 %139, i32 1 %143 = insertelement <4 x i32> %142, i32 %140, i32 2 %144 = insertelement <4 x i32> %143, i32 undef, i32 3 %145 = bitcast <8 x i32> %39 to <32 x i8> %146 = bitcast <4 x i32> %41 to <16 x i8> %147 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %144, <32 x i8> %145, <16 x i8> %146, i32 4) %148 = extractelement <4 x float> %147, i32 0 %149 = extractelement <4 x float> %147, i32 1 %150 = extractelement <4 x float> %147, i32 2 %151 = call float @llvm.pow.f32(float %148, float 0x40019999A0000000) %152 = call float @llvm.pow.f32(float %149, float 0x40019999A0000000) %153 = call float @llvm.pow.f32(float %150, float 0x40019999A0000000) %154 = insertelement <4 x float> undef, float %118, i32 0 %155 = insertelement <4 x float> %154, float %120, i32 1 %156 = insertelement <4 x float> %155, float %122, i32 2 %157 = insertelement <4 x float> %156, float %101, i32 3 %158 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %157) %159 = extractelement <4 x float> %158, i32 0 %160 = extractelement <4 x float> %158, i32 1 %161 = extractelement <4 x float> %158, i32 2 %162 = extractelement <4 x float> %158, i32 3 %163 = call float @fabs(float %161) %164 = fdiv float 1.000000e+00, %163 %165 = fmul float %159, %164 %166 = fadd float %165, 1.500000e+00 %167 = fmul float %160, %164 %168 = fadd float %167, 1.500000e+00 %169 = bitcast float %168 to i32 %170 = bitcast float %166 to i32 %171 = bitcast float %162 to i32 %172 = insertelement <4 x i32> undef, i32 %169, i32 0 %173 = insertelement <4 x i32> %172, i32 %170, i32 1 %174 = insertelement <4 x i32> %173, i32 %171, i32 2 %175 = insertelement <4 x i32> %174, i32 undef, i32 3 %176 = bitcast <8 x i32> %35 to <32 x i8> %177 = bitcast <4 x i32> %37 to <16 x i8> %178 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %175, <32 x i8> %176, <16 x i8> %177, i32 4) %179 = extractelement <4 x float> %178, i32 0 %180 = extractelement <4 x float> %178, i32 1 %181 = extractelement <4 x float> %178, i32 2 %182 = call float @llvm.pow.f32(float %179, float 0x40019999A0000000) %183 = call float @llvm.pow.f32(float %180, float 0x40019999A0000000) %184 = call float @llvm.pow.f32(float %181, float 0x40019999A0000000) %185 = fmul float %182, 0x3FFB333340000000 %186 = fmul float %183, 0x3FFB333340000000 %187 = fmul float %184, 0x3FFB333340000000 %188 = fsub float -0.000000e+00, %185 %189 = fmul float %151, 0x3FFB333340000000 %190 = fadd float %189, %188 %191 = fsub float -0.000000e+00, %186 %192 = fmul float %152, 0x3FFB333340000000 %193 = fadd float %192, %191 %194 = fsub float -0.000000e+00, %187 %195 = fmul float %153, 0x3FFB333340000000 %196 = fadd float %195, %194 %197 = fadd float %87, 0.000000e+00 %198 = fadd float %88, 1.000000e+00 %199 = bitcast float %197 to i32 %200 = bitcast float %198 to i32 %201 = insertelement <2 x i32> undef, i32 %199, i32 0 %202 = insertelement <2 x i32> %201, i32 %200, i32 1 %203 = bitcast <8 x i32> %31 to <32 x i8> %204 = bitcast <4 x i32> %33 to <16 x i8> %205 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %202, <32 x i8> %203, <16 x i8> %204, i32 2) %206 = extractelement <4 x float> %205, i32 0 %207 = fmul float %206, %190 %208 = fadd float %207, %185 %209 = fmul float %206, %193 %210 = fadd float %209, %186 %211 = fmul float %206, %196 %212 = fadd float %211, %187 %213 = fadd float %87, 0.000000e+00 %214 = fadd float %88, 1.000000e+00 %215 = bitcast float %213 to i32 %216 = bitcast float %214 to i32 %217 = insertelement <2 x i32> undef, i32 %215, i32 0 %218 = insertelement <2 x i32> %217, i32 %216, i32 1 %219 = bitcast <8 x i32> %27 to <32 x i8> %220 = bitcast <4 x i32> %29 to <16 x i8> %221 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %218, <32 x i8> %219, <16 x i8> %220, i32 2) %222 = extractelement <4 x float> %221, i32 0 %223 = extractelement <4 x float> %221, i32 1 %224 = extractelement <4 x float> %221, i32 2 %225 = fmul float %222, %208 %226 = fadd float %225, 0x3F50624DE0000000 %227 = fmul float %223, %210 %228 = fadd float %227, 0x3F50624DE0000000 %229 = fmul float %224, %212 %230 = fadd float %229, 0x3F50624DE0000000 %231 = call float @fabs(float %226) %232 = call float @llvm.log2.f32(float %231) %233 = call float @fabs(float %228) %234 = call float @llvm.log2.f32(float %233) %235 = call float @fabs(float %230) %236 = call float @llvm.log2.f32(float %235) %237 = fmul float %232, 0x3FDD1743E0000000 %238 = fmul float %234, 0x3FDD1743E0000000 %239 = fmul float %236, 0x3FDD1743E0000000 %240 = call float @llvm.AMDIL.exp.(float %237) %241 = call float @llvm.AMDIL.exp.(float %238) %242 = call float @llvm.AMDIL.exp.(float %239) %243 = bitcast float %240 to i32 %244 = bitcast float %241 to i32 %245 = bitcast float %242 to i32 %246 = insertelement <4 x i32> undef, i32 %243, i32 0 %247 = insertelement <4 x i32> %246, i32 %244, i32 1 %248 = insertelement <4 x i32> %247, i32 %245, i32 2 %249 = insertelement <4 x i32> %248, i32 undef, i32 3 %250 = bitcast <8 x i32> %43 to <32 x i8> %251 = bitcast <4 x i32> %45 to <16 x i8> %252 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %249, <32 x i8> %250, <16 x i8> %251, i32 3) %253 = extractelement <4 x float> %252, i32 0 %254 = extractelement <4 x float> %252, i32 1 %255 = extractelement <4 x float> %252, i32 2 %256 = call i32 @llvm.SI.packf16(float %253, float %254) %257 = bitcast i32 %256 to float %258 = call i32 @llvm.SI.packf16(float %255, float 1.000000e+00) %259 = bitcast i32 %258 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %257, float %259, float %257, float %259) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { nounwind readnone readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 2, 0, [m0] ; C8080200 V_INTERP_P2_F32 v2, [v2], v1, 2, 0, [m0] ; C8090201 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_MUL_F32_e32 v4, v3, v3 ; 10080703 V_MAD_F32 v4, v2, v2, v4, 0, 0 ; D2820004 04120502 V_MOV_B32_e32 v5, 1.000000e-07 ; 7E0A02FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v4, v5 ; 7C0C0B04 V_CMP_U_F32_e64 s[0:1], v4, v4, 0, 0 ; D0100000 00020904 V_CNDMASK_B32_e64 v6, 0, -1, vcc, 0, 0, 0, 0 ; D2000006 01A98280 V_CNDMASK_B32_e64 v7, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000007 00018280 V_OR_B32_e32 v6, v6, v7 ; 380C0F06 V_MOV_B32_e32 v7, 0x33d6bf95 ; 7E0E02FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v6, 0, 0, 0 ; D10A0000 00010106 V_CNDMASK_B32_e64 v4, v7, v4, s[0:1], 0, 0, 0, 0 ; D2000004 00020907 V_RSQ_CLAMP_F32_e32 v4, v4 ; 7E085904 V_MUL_F32_e32 v6, v4, v3 ; 100C0704 V_INTERP_P1_F32 v8, v0, 0, 1, [m0] ; C8200400 V_INTERP_P2_F32 v8, [v8], v1, 0, 1, [m0] ; C8210401 V_INTERP_P1_F32 v10, v0, 1, 1, [m0] ; C8280500 V_INTERP_P2_F32 v10, [v10], v1, 1, 1, [m0] ; C8290501 V_SUB_F32_e32 v9, 1.000000e+00, v10 ; 081214F2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x0 ; C0C60700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[12:19], s[0:3] ; F0800F00 00030A08 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v14, v11, v11 ; 061C170B V_ADD_F32_e32 v14, -1.000000e+00, v14 ; 061C1CF3 V_MUL_F32_e32 v6, v6, v14 ; 100C1D06 V_INTERP_P1_F32 v15, v0, 0, 0, [m0] ; C83C0000 V_INTERP_P2_F32 v15, [v15], v1, 0, 0, [m0] ; C83D0001 V_MUL_F32_e32 v0, v2, v2 ; 10000502 V_MAD_F32 v0, v15, v15, v0, 0, 0 ; D2820000 04021F0F V_CMP_GE_F32_e32 vcc, v0, v5 ; 7C0C0B00 V_CMP_U_F32_e64 s[0:1], v0, v0, 0, 0 ; D0100000 00020100 V_CNDMASK_B32_e64 v1, 0, -1, vcc, 0, 0, 0, 0 ; D2000001 01A98280 V_CNDMASK_B32_e64 v5, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000005 00018280 V_OR_B32_e32 v1, v1, v5 ; 38020B01 V_CMP_NE_I32_e64 s[0:1], v1, 0, 0, 0 ; D10A0000 00010101 V_CNDMASK_B32_e64 v0, v7, v0, s[0:1], 0, 0, 0, 0 ; D2000000 00020107 V_RSQ_CLAMP_F32_e32 v0, v0 ; 7E005900 V_MUL_F32_e32 v1, v0, v15 ; 10021F00 V_ADD_F32_e32 v5, v10, v10 ; 060A150A V_ADD_F32_e32 v5, -1.000000e+00, v5 ; 060A0AF3 V_MUL_F32_e32 v1, v5, v1 ; 10020305 V_SUB_F32_e32 v1, v1, v6 ; 08020D01 V_ADD_F32_e32 v6, v12, v12 ; 060C190C V_ADD_F32_e32 v6, -1.000000e+00, v6 ; 060C0CF3 V_MAD_F32 v12, v6, v2, v1, 0, 0 ; D282000C 04060506 V_MUL_F32_e32 v1, v4, v2 ; 10020504 V_MUL_F32_e32 v1, v1, v14 ; 10021D01 V_MAD_F32 v11, v6, v3, v1, 0, 0 ; D282000B 04060706 V_MUL_F32_e32 v0, v0, v2 ; 10000500 V_MUL_F32_e32 v0, v5, v0 ; 10000105 V_MUL_F32_e32 v1, v6, v15 ; 10021F06 V_SUB_F32_e32 v10, v1, v0 ; 08140101 V_CUBESC_F32 v1, v10, v11, v12, 0, 0 ; D28A0001 0432170A V_CUBETC_F32 v0, v10, v11, v12, 0, 0 ; D28C0000 0432170A V_CUBEMA_F32 v2, v10, v11, v12, 0, 0 ; D28E0002 0432170A V_CUBEID_F32 v3, v10, v11, v12, 0, 0 ; D2880003 0432170A V_MOV_B32_e32 v14, 0x7fffffff ; 7E1C02FF 7FFFFFFF V_AND_B32_e32 v15, v2, v14 ; 361E1D02 V_RCP_F32_e32 v15, v15 ; 7E1E550F V_MOV_B32_e32 v16, 1.500000e+00 ; 7E2002FF 3FC00000 V_MAD_F32 v2, v0, v15, v16, 0, 0 ; D2820002 04421F00 V_MAD_F32 v1, v1, v15, v16, 0, 0 ; D2820001 04421F01 S_LOAD_DWORDX4 s[0:3], s[4:5], 0xc ; C080050C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x18 ; C0C40718 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[8:15], s[0:3] ; F0800700 00020001 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v3, v2 ; 7E064F02 V_MUL_LEGACY_F32_e32 v3, 2.200000e+00, v3 ; 0E0606FF 400CCCCD V_EXP_F32_e32 v3, v3 ; 7E064B03 V_MUL_F32_e32 v4, 1.700000e+00, v3 ; 100806FF 3FD9999A V_MOV_B32_e32 v13, 0.000000e+00 ; 7E1A0280 V_CUBESC_F32 v18, v10, v11, v12, 0, 0 ; D28A0012 0432170A V_CUBETC_F32 v17, v10, v11, v12, 0, 0 ; D28C0011 0432170A V_CUBEMA_F32 v19, v10, v11, v12, 0, 0 ; D28E0013 0432170A V_CUBEID_F32 v20, v10, v11, v12, 0, 0 ; D2880014 0432170A V_AND_B32_e32 v5, v19, v14 ; 360A1D13 V_RCP_F32_e32 v5, v5 ; 7E0A5505 V_MAD_F32 v19, v17, v5, v16, 0, 0 ; D2820013 04420B11 V_MAD_F32 v18, v18, v5, v16, 0, 0 ; D2820012 04420B12 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x10 ; C0800510 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x20 ; C0C40720 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[5:7], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[8:15], s[0:3] ; F0800700 00020512 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v10, v7 ; 7E144F07 V_MUL_LEGACY_F32_e32 v10, 2.200000e+00, v10 ; 0E1414FF 400CCCCD V_EXP_F32_e32 v10, v10 ; 7E144B0A V_MUL_F32_e32 v10, 1.700000e+00, v10 ; 101414FF 3FD9999A V_SUB_F32_e32 v4, v10, v4 ; 0808090A S_LOAD_DWORDX4 s[0:3], s[4:5], 0x8 ; C0800508 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x10 ; C0C40710 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v10, 1, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800100 00020A08 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v10, v4 ; 1008090A V_MOV_B32_e32 v11, 1.700000e+00 ; 7E1602FF 3FD9999A V_MAD_F32 v3, v3, v11, v4, 0, 0 ; D2820003 04121703 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x8 ; C0C40708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800700 00020F08 V_MOV_B32_e32 v4, 1.000000e-03 ; 7E0802FF 3A83126F S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v3, v17, v3, v4, 0, 0 ; D2820003 04120711 V_AND_B32_e32 v3, v3, v14 ; 36061D03 V_LOG_F32_e32 v3, v3 ; 7E064F03 V_MUL_F32_e32 v3, 4.545450e-01, v3 ; 100606FF 3EE8BA1F V_EXP_F32_e32 v20, v3 ; 7E284B03 V_LOG_F32_e32 v3, v1 ; 7E064F01 V_MUL_LEGACY_F32_e32 v3, 2.200000e+00, v3 ; 0E0606FF 400CCCCD V_EXP_F32_e32 v3, v3 ; 7E064B03 V_MUL_F32_e32 v8, 1.700000e+00, v3 ; 101006FF 3FD9999A V_LOG_F32_e32 v9, v6 ; 7E124F06 V_MUL_LEGACY_F32_e32 v9, 2.200000e+00, v9 ; 0E1212FF 400CCCCD V_EXP_F32_e32 v9, v9 ; 7E124B09 V_MUL_F32_e32 v9, 1.700000e+00, v9 ; 101212FF 3FD9999A V_SUB_F32_e32 v8, v9, v8 ; 08101109 V_MUL_F32_e32 v8, v10, v8 ; 1010110A V_MAD_F32 v3, v3, v11, v8, 0, 0 ; D2820003 04221703 V_MAD_F32 v3, v16, v3, v4, 0, 0 ; D2820003 04120710 V_AND_B32_e32 v3, v3, v14 ; 36061D03 V_LOG_F32_e32 v3, v3 ; 7E064F03 V_MUL_F32_e32 v3, 4.545450e-01, v3 ; 100606FF 3EE8BA1F V_EXP_F32_e32 v19, v3 ; 7E264B03 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_LEGACY_F32_e32 v0, 2.200000e+00, v0 ; 0E0000FF 400CCCCD V_EXP_F32_e32 v0, v0 ; 7E004B00 V_MUL_F32_e32 v1, 1.700000e+00, v0 ; 100200FF 3FD9999A V_LOG_F32_e32 v2, v5 ; 7E044F05 V_MUL_LEGACY_F32_e32 v2, 2.200000e+00, v2 ; 0E0404FF 400CCCCD V_EXP_F32_e32 v2, v2 ; 7E044B02 V_MUL_F32_e32 v2, 1.700000e+00, v2 ; 100404FF 3FD9999A V_SUB_F32_e32 v1, v2, v1 ; 08020302 V_MUL_F32_e32 v1, v10, v1 ; 1002030A V_MAD_F32 v0, v0, v11, v1, 0, 0 ; D2820000 04061700 V_MAD_F32 v0, v15, v0, v4, 0, 0 ; D2820000 0412010F V_AND_B32_e32 v0, v0, v14 ; 36001D00 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_F32_e32 v0, 4.545450e-01, v0 ; 100000FF 3EE8BA1F V_EXP_F32_e32 v18, v0 ; 7E244B00 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x14 ; C0800514 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x28 ; C0C20728 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[4:11], s[0:3] ; F0800700 00010012 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_PKRTZ_F16_F32_e32 v3, v0, v1 ; 5E060300 V_CVT_PKRTZ_F16_F32_e64 v0, v2, 1.000000e+00, 0, 0 ; D25E0000 0001E502 EXP 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..6] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: LRP TEMP[0].xy, IN[0], CONST[5].zwzw, CONST[5] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: ADD TEMP[1].xy, TEMP[0], IMM[0].xxxx 3: MUL TEMP[2], TEMP[1].yyyy, CONST[1] 4: MAD TEMP[0], TEMP[1].xxxx, CONST[0], TEMP[2] 5: MAD TEMP[0], CONST[6].xxxx, CONST[2], TEMP[0] 6: ADD TEMP[0], TEMP[0], CONST[3] 7: MAD TEMP[1].xy, IN[1], CONST[4], CONST[4].zwzw 8: MOV TEMP[1].xy, TEMP[1].xyxx 9: MOV TEMP[1].zw, IMM[0].yyyy 10: MOV OUT[1], TEMP[1] 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %39 = load <16 x i8> addrspace(2)* %38, !tbaa !0 %40 = add i32 %5, %7 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = call float @llvm.AMDGPU.lrp(float %42, float %35, float %33) %51 = call float @llvm.AMDGPU.lrp(float %43, float %36, float %34) %52 = fadd float %50, 1.000000e+00 %53 = fadd float %51, 1.000000e+00 %54 = fmul float %53, %17 %55 = fmul float %53, %18 %56 = fmul float %53, %19 %57 = fmul float %53, %20 %58 = fmul float %52, %13 %59 = fadd float %58, %54 %60 = fmul float %52, %14 %61 = fadd float %60, %55 %62 = fmul float %52, %15 %63 = fadd float %62, %56 %64 = fmul float %52, %16 %65 = fadd float %64, %57 %66 = fmul float %37, %21 %67 = fadd float %66, %59 %68 = fmul float %37, %22 %69 = fadd float %68, %61 %70 = fmul float %37, %23 %71 = fadd float %70, %63 %72 = fmul float %37, %24 %73 = fadd float %72, %65 %74 = fadd float %67, %25 %75 = fadd float %69, %26 %76 = fadd float %71, %27 %77 = fadd float %73, %28 %78 = fmul float %48, %29 %79 = fadd float %78, %31 %80 = fmul float %49, %30 %81 = fadd float %80, %32 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %79, float %81, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %74, float %75, float %76, float %77) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x11 ; C2020111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v2, v6, v5, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v1, v1, v7, v6, 0, 0 ; D2820001 041A0F01 V_MOV_B32_e32 v2, 0.000000e+00 ; 7E040280 EXP 15, 32, 0, 0, 0, v1, v5, v2, v2 ; F800020F 02020501 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v4, 1.000000e+00, v1 ; 080802F2 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v4 ; 10080804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v1, v5, v4, 0, 0 ; D2820004 04120B01 V_ADD_F32_e32 v4, 1.000000e+00, v4 ; 060808F2 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v4 ; 100A0804 V_SUB_F32_e32 v6, 1.000000e+00, v0 ; 080C00F2 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v6 ; 100C0C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v0, v0, v7, v6, 0, 0 ; D2820000 041A0F00 V_ADD_F32_e32 v0, 1.000000e+00, v0 ; 060000F2 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v0, s4, v5, 0, 0 ; D2820001 04140900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xb ; C202810B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_MAD_F32 v1, s4, v2, v1, 0, 0 ; D2820001 04060404 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xf ; C202810F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s5, v1 ; 06020205 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x6 ; C2028106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s5, v4 ; 10040805 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x2 ; C2028102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v0, s5, v2, 0, 0 ; D2820002 04080B00 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xa ; C202810A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s5 ; 7E060205 V_MAD_F32 v2, s4, v3, v2, 0, 0 ; D2820002 040A0604 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xe ; C202810E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v2, s5, v2 ; 06040405 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x5 ; C2028105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s5, v4 ; 10060805 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x1 ; C2028101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v0, s5, v3, 0, 0 ; D2820003 040C0B00 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x9 ; C2028109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s5 ; 7E0A0205 V_MAD_F32 v3, s4, v5, v3, 0, 0 ; D2820003 040E0A04 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xd ; C202810D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s5, v3 ; 06060605 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x4 ; C2028104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s5, v4 ; 10080805 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x0 ; C2028100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s5, v4, 0, 0 ; D2820000 04100B00 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x8 ; C2028108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v4, s5 ; 7E080205 V_MAD_F32 v0, s4, v4, v0, 0, 0 ; D2820000 04020804 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 EXP 15, 12, 0, 1, 0, v0, v3, v2, v1 ; F80008CF 01020300 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 2.0000, -0.9999} IMM[1] FLT32 { 0.0000, -1.0000, -2.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].xy, IN[0].xyyy 7: TEX TEMP[0].xy, TEMP[0], SAMP[1], 2D 8: MAD TEMP[2].z, TEMP[0].yyyy, IMM[0].zzzz, IMM[0].wwww 9: MAD TEMP[3], TEMP[0].xxxy, IMM[0].yyyz, IMM[1].xxxy 10: MAD TEMP[0], TEMP[0].xxxy, IMM[1].yyyz, IMM[0].yyyy 11: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[1].xxxx 12: UIF TEMP[4].xxxx :0 13: MOV TEMP[4].x, TEMP[3].xxxx 14: ELSE :0 15: MOV TEMP[4].x, TEMP[0].xxxx 16: ENDIF 17: MOV TEMP[4].x, TEMP[4].xxxx 18: FSGE TEMP[5].x, TEMP[2].zzzz, IMM[1].xxxx 19: UIF TEMP[5].xxxx :0 20: MOV TEMP[5].x, TEMP[3].yyyy 21: ELSE :0 22: MOV TEMP[5].x, TEMP[0].yyyy 23: ENDIF 24: MOV TEMP[4].y, TEMP[5].xxxx 25: FSGE TEMP[5].x, TEMP[2].zzzz, IMM[1].xxxx 26: UIF TEMP[5].xxxx :0 27: MOV TEMP[5].x, TEMP[3].zzzz 28: ELSE :0 29: MOV TEMP[5].x, TEMP[0].zzzz 30: ENDIF 31: MOV TEMP[4].z, TEMP[5].xxxx 32: FSGE TEMP[2].x, TEMP[2].zzzz, IMM[1].xxxx 33: UIF TEMP[2].xxxx :0 34: MOV TEMP[2].x, TEMP[3].wwww 35: ELSE :0 36: MOV TEMP[2].x, TEMP[0].wwww 37: ENDIF 38: MOV TEMP[4].w, TEMP[2].xxxx 39: MOV OUT[1], TEMP[4] 40: MOV OUT[0], TEMP[1] 41: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call float @llvm.pow.f32(float %39, float 0x40019999A0000000) %44 = call float @llvm.pow.f32(float %40, float 0x40019999A0000000) %45 = call float @llvm.pow.f32(float %41, float 0x40019999A0000000) %46 = call float @llvm.pow.f32(float %42, float 1.000000e+00) %47 = bitcast float %30 to i32 %48 = bitcast float %31 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = bitcast <8 x i32> %27 to <32 x i8> %52 = bitcast <4 x i32> %29 to <16 x i8> %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %50, <32 x i8> %51, <16 x i8> %52, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = fmul float %55, 2.000000e+00 %57 = fadd float %56, 0xBFEFFF2E40000000 %58 = fmul float %54, 1.000000e+00 %59 = fadd float %58, 0.000000e+00 %60 = fmul float %54, 1.000000e+00 %61 = fadd float %60, 0.000000e+00 %62 = fmul float %54, 1.000000e+00 %63 = fadd float %62, 0.000000e+00 %64 = fmul float %55, 2.000000e+00 %65 = fadd float %64, -1.000000e+00 %66 = fmul float %54, -1.000000e+00 %67 = fadd float %66, 1.000000e+00 %68 = fmul float %54, -1.000000e+00 %69 = fadd float %68, 1.000000e+00 %70 = fmul float %54, -1.000000e+00 %71 = fadd float %70, 1.000000e+00 %72 = fmul float %55, -2.000000e+00 %73 = fadd float %72, 1.000000e+00 %74 = fcmp oge float %57, 0.000000e+00 %75 = sext i1 %74 to i32 %76 = bitcast i32 %75 to float %77 = bitcast float %76 to i32 %78 = icmp ne i32 %77, 0 %. = select i1 %78, float %59, float %67 %79 = fcmp oge float %57, 0.000000e+00 %80 = sext i1 %79 to i32 %81 = bitcast i32 %80 to float %82 = bitcast float %81 to i32 %83 = icmp ne i32 %82, 0 %temp20.0 = select i1 %83, float %61, float %69 %84 = fcmp oge float %57, 0.000000e+00 %85 = sext i1 %84 to i32 %86 = bitcast i32 %85 to float %87 = bitcast float %86 to i32 %88 = icmp ne i32 %87, 0 %.33 = select i1 %88, float %63, float %71 %89 = fcmp oge float %57, 0.000000e+00 %90 = sext i1 %89 to i32 %91 = bitcast i32 %90 to float %92 = bitcast float %91 to i32 %93 = icmp ne i32 %92, 0 %temp8.0 = select i1 %93, float %65, float %73 %94 = call i32 @llvm.SI.packf16(float %43, float %44) %95 = bitcast i32 %94 to float %96 = call i32 @llvm.SI.packf16(float %45, float %46) %97 = bitcast i32 %96 to float %98 = call i32 @llvm.SI.packf16(float %., float %temp20.0) %99 = bitcast i32 %98 to float %100 = call i32 @llvm.SI.packf16(float %.33, float %temp8.0) %101 = bitcast i32 %100 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %95, float %97, float %95, float %97) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %99, float %101, float %99, float %101) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x0 ; C0C40700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800F00 00020402 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v0, v7 ; 7E004F07 V_MUL_LEGACY_F32_e32 v0, 1.000000e+00, v0 ; 0E0000F2 V_EXP_F32_e32 v0, v0 ; 7E004B00 V_LOG_F32_e32 v1, v6 ; 7E024F06 V_MUL_LEGACY_F32_e32 v1, 2.200000e+00, v1 ; 0E0202FF 400CCCCD V_EXP_F32_e32 v1, v1 ; 7E024B01 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_LOG_F32_e32 v1, v5 ; 7E024F05 V_MUL_LEGACY_F32_e32 v1, 2.200000e+00, v1 ; 0E0202FF 400CCCCD V_EXP_F32_e32 v1, v1 ; 7E024B01 V_LOG_F32_e32 v4, v4 ; 7E084F04 V_MUL_LEGACY_F32_e32 v4, 2.200000e+00, v4 ; 0E0808FF 400CCCCD V_EXP_F32_e32 v4, v4 ; 7E084B04 V_CVT_PKRTZ_F16_F32_e32 v1, v4, v1 ; 5E020304 EXP 15, 0, 1, 0, 0, v1, v0, v1, v0 ; F800040F 00010001 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x8 ; C0C20708 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F IMAGE_SAMPLE v[0:1], 3, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800300 00010002 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v2, v1, -2.000000e+00, 1.000000e+00, 0, 0 ; D2820002 03C9EB01 V_ADD_F32_e32 v3, v1, v1 ; 06060301 V_ADD_F32_e32 v4, -1.000000e+00, v3 ; 060806F3 V_ADD_F32_e32 v3, -9.999000e-01, v3 ; 060606FF BF7FF972 V_CMP_GE_F32_e64 s[0:1], v3, 0.000000e+00, 0, 0 ; D00C0000 00010103 V_CNDMASK_B32_e64 v2, v2, v4, s[0:1], 0, 0, 0, 0 ; D2000002 00020902 V_SUB_F32_e32 v3, 1.000000e+00, v0 ; 080600F2 V_CNDMASK_B32_e64 v0, v3, v0, s[0:1], 0, 0, 0, 0 ; D2000000 00020103 V_CVT_PKRTZ_F16_F32_e32 v1, v0, v2 ; 5E020500 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v0 ; 5E000100 EXP 15, 1, 1, 1, 1, v0, v1, v0, v1 ; F8001C1F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[2].xyxx 1: ADD TEMP[1].xy, -TEMP[0], CONST[3] 2: MOV TEMP[0].xy, TEMP[1].xyxx 3: MAD TEMP[0].xy, IN[0], TEMP[0], CONST[2] 4: MAD TEMP[1].zw, TEMP[0].xyxy, CONST[1].xyxy, -CONST[0].xyxy 5: MOV TEMP[0].xy, TEMP[0].xyxx 6: MOV TEMP[1].xy, TEMP[1].zwzz 7: MOV TEMP[1].zw, IMM[0].yyxy 8: MOV TEMP[0].zw, IMM[0].yyxy 9: MOV OUT[1], TEMP[0] 10: MOV OUT[0], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = fsub float -0.000000e+00, %17 %28 = fadd float %27, %19 %29 = fsub float -0.000000e+00, %18 %30 = fadd float %29, %20 %31 = fmul float %25, %28 %32 = fadd float %31, %17 %33 = fmul float %26, %30 %34 = fadd float %33, %18 %35 = fsub float -0.000000e+00, %13 %36 = fmul float %32, %15 %37 = fadd float %36, %35 %38 = fsub float -0.000000e+00, %14 %39 = fmul float %34, %16 %40 = fadd float %39, %38 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %32, float %34, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %37, float %40, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_BUFFER_LOAD_DWORD s5, s[0:3], 0x9 ; C2028109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s5 ; 7E020205 V_SUB_F32_e32 v2, s4, v1 ; 08040204 V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[3:6], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010300 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v4, v2, v1, 0, 0 ; D2820000 04060504 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xc ; C202010C S_BUFFER_LOAD_DWORD s5, s[0:3], 0x8 ; C2028108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s5 ; 7E020205 V_SUB_F32_e32 v2, s4, v1 ; 08040204 V_MAD_F32 v1, v3, v2, v1, 0, 0 ; D2820001 04060503 V_MOV_B32_e32 v2, 1.000000e+00 ; 7E0402F2 V_MOV_B32_e32 v3, 0.000000e+00 ; 7E060280 EXP 15, 32, 0, 0, 0, v1, v0, v3, v2 ; F800020F 02030001 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s4, v0 ; 0A000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v1, s0, v1 ; 0A020200 EXP 15, 12, 0, 1, 0, v1, v0, v3, v2 ; F80008CF 02030001 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: ABS TEMP[0].x, TEMP[1].xxxx 7: LG2 TEMP[0].x, TEMP[0].xxxx 8: ABS TEMP[2].x, TEMP[1].yyyy 9: LG2 TEMP[2].x, TEMP[2].xxxx 10: MOV TEMP[0].y, TEMP[2].xxxx 11: ABS TEMP[1].x, TEMP[1].zzzz 12: LG2 TEMP[1].x, TEMP[1].xxxx 13: MOV TEMP[0].z, TEMP[1].xxxx 14: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 15: EX2 TEMP[1].x, TEMP[0].xxxx 16: EX2 TEMP[2].x, TEMP[0].yyyy 17: MOV TEMP[1].y, TEMP[2].xxxx 18: EX2 TEMP[0].x, TEMP[0].zzzz 19: MOV TEMP[1].z, TEMP[0].xxxx 20: MOV TEMP[1].w, IMM[0].yyyy 21: MOV OUT[0], TEMP[1] 22: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %47 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %49 = call float @fabs(float %46) %50 = call float @llvm.log2.f32(float %49) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = fmul float %50, 0x3FDD1743E0000000 %56 = fmul float %52, 0x3FDD1743E0000000 %57 = fmul float %54, 0x3FDD1743E0000000 %58 = call float @llvm.AMDIL.exp.(float %55) %59 = call float @llvm.AMDIL.exp.(float %56) %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call i32 @llvm.SI.packf16(float %58, float %59) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float %60, float 1.000000e+00) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 1, 0, [m0] ; C8080100 V_INTERP_P2_F32 v2, [v2], v1, 1, 0, [m0] ; C8090101 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x1 ; C2000101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s0 ; 7E060200 V_MAD_F32 v5, v2, s8, v3, 0, 0 ; D2820005 040C1102 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_MAD_F32 v4, v2, s8, v3, 0, 0 ; D2820004 040C1102 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800700 00010004 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v3, v1 ; 7E064F01 V_MUL_LEGACY_F32_e32 v3, 2.200000e+00, v3 ; 0E0606FF 400CCCCD V_EXP_F32_e32 v3, v3 ; 7E064B03 V_MOV_B32_e32 v4, 0x7fffffff ; 7E0802FF 7FFFFFFF V_AND_B32_e32 v3, v3, v4 ; 36060903 V_LOG_F32_e32 v3, v3 ; 7E064F03 V_MUL_F32_e32 v3, 4.545450e-01, v3 ; 100606FF 3EE8BA1F V_EXP_F32_e32 v3, v3 ; 7E064B03 V_LOG_F32_e32 v5, v0 ; 7E0A4F00 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_AND_B32_e32 v5, v5, v4 ; 360A0905 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 4.545450e-01, v5 ; 100A0AFF 3EE8BA1F V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_CVT_PKRTZ_F16_F32_e32 v3, v5, v3 ; 5E060705 V_LOG_F32_e32 v0, v2 ; 7E004F02 V_MUL_LEGACY_F32_e32 v0, 2.200000e+00, v0 ; 0E0000FF 400CCCCD V_EXP_F32_e32 v0, v0 ; 7E004B00 V_AND_B32_e32 v0, v0, v4 ; 36000900 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_F32_e32 v0, 4.545450e-01, v0 ; 100000FF 3EE8BA1F V_EXP_F32_e32 v0, v0 ; 7E004B00 V_CVT_PKRTZ_F16_F32_e64 v0, v0, 1.000000e+00, 0, 0 ; D25E0000 0001E500 EXP 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 V_MOV_B32_e32 v5, 0.000000e+00 ; 7E0A0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MUL_F32_e32 v6, v1, v6 ; 100C0D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v6, s4, v6 ; 0A0C0C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MUL_F32_e32 v0, v0, v7 ; 10000F00 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s0, v0 ; 0A000000 EXP 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: ABS TEMP[0].x, TEMP[1].xxxx 8: LG2 TEMP[0].x, TEMP[0].xxxx 9: ABS TEMP[2].x, TEMP[1].yyyy 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ABS TEMP[2].x, TEMP[1].zzzz 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[0].z, TEMP[2].xxxx 15: MOV TEMP[1].w, TEMP[1].wwww 16: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %49 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %46, float 1.000000e+00) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %49) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %50) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 1, 0, [m0] ; C8080100 V_INTERP_P2_F32 v2, [v2], v1, 1, 0, [m0] ; C8090101 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x1 ; C2000101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s0 ; 7E060200 V_MAD_F32 v5, v2, s8, v3, 0, 0 ; D2820005 040C1102 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_MAD_F32 v4, v2, s8, v3, 0, 0 ; D2820004 040C1102 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010004 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v4, v3 ; 7E084F03 V_MUL_LEGACY_F32_e32 v4, 1.000000e+00, v4 ; 0E0808F2 V_EXP_F32_e32 v4, v4 ; 7E084B04 V_LOG_F32_e32 v5, v2 ; 7E0A4F02 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_MOV_B32_e32 v6, 0x7fffffff ; 7E0C02FF 7FFFFFFF V_AND_B32_e32 v5, v5, v6 ; 360A0D05 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 4.545450e-01, v5 ; 100A0AFF 3EE8BA1F V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_CVT_PKRTZ_F16_F32_e32 v4, v5, v4 ; 5E080905 V_LOG_F32_e32 v5, v1 ; 7E0A4F01 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_AND_B32_e32 v5, v5, v6 ; 360A0D05 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 4.545450e-01, v5 ; 100A0AFF 3EE8BA1F V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_LEGACY_F32_e32 v0, 2.200000e+00, v0 ; 0E0000FF 400CCCCD V_EXP_F32_e32 v0, v0 ; 7E004B00 V_AND_B32_e32 v0, v0, v6 ; 36000D00 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_F32_e32 v0, 4.545450e-01, v0 ; 100000FF 3EE8BA1F V_EXP_F32_e32 v0, v0 ; 7E004B00 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v5 ; 5E000B00 EXP 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 V_MOV_B32_e32 v5, 0.000000e+00 ; 7E0A0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MUL_F32_e32 v6, v1, v6 ; 100C0D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v6, s4, v6 ; 0A0C0C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MUL_F32_e32 v0, v0, v7 ; 10000F00 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s0, v0 ; 0A000000 EXP 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: ABS TEMP[0].x, TEMP[1].xxxx 8: LG2 TEMP[0].x, TEMP[0].xxxx 9: ABS TEMP[2].x, TEMP[1].yyyy 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ABS TEMP[2].x, TEMP[1].zzzz 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[0].z, TEMP[2].xxxx 15: MOV TEMP[1].w, TEMP[1].wwww 16: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %49 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %46, float 1.000000e+00) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %49) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %50) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 1, 0, [m0] ; C8080100 V_INTERP_P2_F32 v2, [v2], v1, 1, 0, [m0] ; C8090101 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x1 ; C2000101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s0 ; 7E060200 V_MAD_F32 v5, v2, s8, v3, 0, 0 ; D2820005 040C1102 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_MAD_F32 v4, v2, s8, v3, 0, 0 ; D2820004 040C1102 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010004 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v4, v3 ; 7E084F03 V_MUL_LEGACY_F32_e32 v4, 1.000000e+00, v4 ; 0E0808F2 V_EXP_F32_e32 v4, v4 ; 7E084B04 V_LOG_F32_e32 v5, v2 ; 7E0A4F02 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_MOV_B32_e32 v6, 0x7fffffff ; 7E0C02FF 7FFFFFFF V_AND_B32_e32 v5, v5, v6 ; 360A0D05 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 4.545450e-01, v5 ; 100A0AFF 3EE8BA1F V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_CVT_PKRTZ_F16_F32_e32 v4, v5, v4 ; 5E080905 V_LOG_F32_e32 v5, v1 ; 7E0A4F01 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_AND_B32_e32 v5, v5, v6 ; 360A0D05 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 4.545450e-01, v5 ; 100A0AFF 3EE8BA1F V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_LEGACY_F32_e32 v0, 2.200000e+00, v0 ; 0E0000FF 400CCCCD V_EXP_F32_e32 v0, v0 ; 7E004B00 V_AND_B32_e32 v0, v0, v6 ; 36000D00 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_F32_e32 v0, 4.545450e-01, v0 ; 100000FF 3EE8BA1F V_EXP_F32_e32 v0, v0 ; 7E004B00 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v5 ; 5E000B00 EXP 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 V_MOV_B32_e32 v5, 0.000000e+00 ; 7E0A0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MUL_F32_e32 v6, v1, v6 ; 100C0D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v6, s4, v6 ; 0A0C0C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MUL_F32_e32 v0, v0, v7 ; 10000F00 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s0, v0 ; 0A000000 EXP 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..1] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[1].xxxx, CONST[1].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: MUL TEMP[0].xyz, TEMP[1], CONST[0] 8: ABS TEMP[1].x, TEMP[0].xxxx 9: LG2 TEMP[1].x, TEMP[1].xxxx 10: ABS TEMP[2].x, TEMP[0].yyyy 11: LG2 TEMP[2].x, TEMP[2].xxxx 12: MOV TEMP[1].y, TEMP[2].xxxx 13: ABS TEMP[0].x, TEMP[0].zzzz 14: LG2 TEMP[0].x, TEMP[0].xxxx 15: MOV TEMP[1].z, TEMP[0].xxxx 16: MUL TEMP[0].xyz, TEMP[1], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV TEMP[1].w, IMM[0].yyyy 23: MOV OUT[0], TEMP[1] 24: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %30 = load <8 x i32> addrspace(2)* %29, !tbaa !0 %31 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %32 = load <4 x i32> addrspace(2)* %31, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = fmul float %33, %27 %36 = fadd float %35, %28 %37 = fmul float %34, %27 %38 = fadd float %37, %28 %39 = bitcast float %36 to i32 %40 = bitcast float %38 to i32 %41 = insertelement <2 x i32> undef, i32 %39, i32 0 %42 = insertelement <2 x i32> %41, i32 %40, i32 1 %43 = bitcast <8 x i32> %30 to <32 x i8> %44 = bitcast <4 x i32> %32 to <16 x i8> %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %43, <16 x i8> %44, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = call float @llvm.pow.f32(float %46, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %47, float 0x40019999A0000000) %51 = call float @llvm.pow.f32(float %48, float 0x40019999A0000000) %52 = fmul float %49, %24 %53 = fmul float %50, %25 %54 = fmul float %51, %26 %55 = call float @fabs(float %52) %56 = call float @llvm.log2.f32(float %55) %57 = call float @fabs(float %53) %58 = call float @llvm.log2.f32(float %57) %59 = call float @fabs(float %54) %60 = call float @llvm.log2.f32(float %59) %61 = fmul float %56, 0x3FDD1743E0000000 %62 = fmul float %58, 0x3FDD1743E0000000 %63 = fmul float %60, 0x3FDD1743E0000000 %64 = call float @llvm.AMDIL.exp.(float %61) %65 = call float @llvm.AMDIL.exp.(float %62) %66 = call float @llvm.AMDIL.exp.(float %63) %67 = call i32 @llvm.SI.packf16(float %64, float %65) %68 = bitcast i32 %67 to float %69 = call i32 @llvm.SI.packf16(float %66, float 1.000000e+00) %70 = bitcast i32 %69 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %68, float %70, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 1, 0, [m0] ; C8080100 V_INTERP_P2_F32 v2, [v2], v1, 1, 0, [m0] ; C8090101 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x4 ; C2040104 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x5 ; C2050105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s10 ; 7E06020A V_MAD_F32 v5, v2, s8, v3, 0, 0 ; D2820005 040C1102 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_MAD_F32 v4, v2, s8, v3, 0, 0 ; D2820004 040C1102 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x0 ; C0840500 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x0 ; C0C60700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800700 00430004 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v3, v1 ; 7E064F01 V_MUL_LEGACY_F32_e32 v3, 2.200000e+00, v3 ; 0E0606FF 400CCCCD V_EXP_F32_e32 v3, v3 ; 7E064B03 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v3 ; 10060604 V_MOV_B32_e32 v4, 0x7fffffff ; 7E0802FF 7FFFFFFF V_AND_B32_e32 v3, v3, v4 ; 36060903 V_LOG_F32_e32 v3, v3 ; 7E064F03 V_MUL_F32_e32 v3, 4.545450e-01, v3 ; 100606FF 3EE8BA1F V_EXP_F32_e32 v3, v3 ; 7E064B03 V_LOG_F32_e32 v5, v0 ; 7E0A4F00 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v5 ; 100A0A04 V_AND_B32_e32 v5, v5, v4 ; 360A0905 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 4.545450e-01, v5 ; 100A0AFF 3EE8BA1F V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_CVT_PKRTZ_F16_F32_e32 v3, v5, v3 ; 5E060705 V_LOG_F32_e32 v0, v2 ; 7E004F02 V_MUL_LEGACY_F32_e32 v0, 2.200000e+00, v0 ; 0E0000FF 400CCCCD V_EXP_F32_e32 v0, v0 ; 7E004B00 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x2 ; C2000102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s0, v0 ; 10000000 V_AND_B32_e32 v0, v0, v4 ; 36000900 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_F32_e32 v0, 4.545450e-01, v0 ; 100000FF 3EE8BA1F V_EXP_F32_e32 v0, v0 ; 7E004B00 V_CVT_PKRTZ_F16_F32_e64 v0, v0, 1.000000e+00, 0, 0 ; D25E0000 0001E500 EXP 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..4] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 1.0000, 0.0000} 0: MAD TEMP[0].x, IN[0].xxxx, IMM[0].xxxx, IMM[0].xxxx 1: MOV TEMP[1].xy, CONST[3].xyxx 2: ADD TEMP[1].yz, -TEMP[1].xxyw, CONST[4].xxyw 3: MAD TEMP[2].x, TEMP[0].xxxx, TEMP[1].yyyy, CONST[3].xxxx 4: MOV TEMP[0].x, TEMP[2].xxxx 5: MAD TEMP[2].w, IN[0].yyyy, IMM[0].xxxx, IMM[0].xxxx 6: MOV TEMP[0].w, TEMP[2].wwww 7: MAD TEMP[1].z, TEMP[2].wwww, TEMP[1].zzzz, CONST[3].yyyy 8: MOV TEMP[0].z, TEMP[1].zzzz 9: ADD TEMP[0].xy, TEMP[0].xzzw, CONST[2] 10: MAD TEMP[1].zw, TEMP[0].xyxy, CONST[1].xyxy, -CONST[0].xyxy 11: MOV TEMP[0].xy, TEMP[0].xyxx 12: MOV TEMP[1].xy, TEMP[1].zwzz 13: MOV TEMP[1].zw, IMM[0].zzyz 14: MOV TEMP[0].zw, IMM[0].zzyz 15: MOV OUT[1], TEMP[0] 16: MOV OUT[0], TEMP[1] 17: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = fmul float %27, 5.000000e-01 %30 = fadd float %29, 5.000000e-01 %31 = fsub float -0.000000e+00, %19 %32 = fadd float %31, %21 %33 = fsub float -0.000000e+00, %20 %34 = fadd float %33, %22 %35 = fmul float %30, %32 %36 = fadd float %35, %19 %37 = fmul float %28, 5.000000e-01 %38 = fadd float %37, 5.000000e-01 %39 = fmul float %38, %34 %40 = fadd float %39, %20 %41 = fadd float %36, %17 %42 = fadd float %40, %18 %43 = fsub float -0.000000e+00, %13 %44 = fmul float %41, %15 %45 = fadd float %44, %43 %46 = fsub float -0.000000e+00, %14 %47 = fmul float %42, %16 %48 = fadd float %47, %46 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %45, float %48, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s4, s[0:3], 0x11 ; C2020111 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xd ; C202810D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s5 ; 7E020205 V_SUB_F32_e32 v1, s4, v1 ; 08020204 V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[8:11], s[8:9], 0x0 ; C0840900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[2:5], s[8:11][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80020200 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v3, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820000 03C1E103 V_MAD_F32 v0, v0, v1, s5, 0, 0 ; D2820000 00160300 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s4, v0 ; 06000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xc ; C202810C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s5 ; 7E020205 V_SUB_F32_e32 v1, s4, v1 ; 08020204 V_MAD_F32 v2, v2, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820002 03C1E102 V_MAD_F32 v1, v2, v1, s5, 0, 0 ; D2820001 00160302 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 V_MOV_B32_e32 v2, 1.000000e+00 ; 7E0402F2 V_MOV_B32_e32 v3, 0.000000e+00 ; 7E060280 EXP 15, 32, 0, 0, 0, v1, v0, v3, v2 ; F800020F 02030001 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s4, v0 ; 0A000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v1, s0, v1 ; 0A020200 EXP 15, 12, 0, 1, 0, v1, v0, v3, v2 ; F80008CF 02030001 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: ABS TEMP[0].x, TEMP[1].xxxx 7: LG2 TEMP[0].x, TEMP[0].xxxx 8: ABS TEMP[2].x, TEMP[1].yyyy 9: LG2 TEMP[2].x, TEMP[2].xxxx 10: MOV TEMP[0].y, TEMP[2].xxxx 11: ABS TEMP[1].x, TEMP[1].zzzz 12: LG2 TEMP[1].x, TEMP[1].xxxx 13: MOV TEMP[0].z, TEMP[1].xxxx 14: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 15: EX2 TEMP[1].x, TEMP[0].xxxx 16: EX2 TEMP[2].x, TEMP[0].yyyy 17: MOV TEMP[1].y, TEMP[2].xxxx 18: EX2 TEMP[0].x, TEMP[0].zzzz 19: MOV TEMP[1].z, TEMP[0].xxxx 20: MOV TEMP[1].w, IMM[0].yyyy 21: MOV OUT[0], TEMP[1] 22: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %47 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %49 = call float @fabs(float %46) %50 = call float @llvm.log2.f32(float %49) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = fmul float %50, 0x3FDD1743E0000000 %56 = fmul float %52, 0x3FDD1743E0000000 %57 = fmul float %54, 0x3FDD1743E0000000 %58 = call float @llvm.AMDIL.exp.(float %55) %59 = call float @llvm.AMDIL.exp.(float %56) %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call i32 @llvm.SI.packf16(float %58, float %59) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float %60, float 1.000000e+00) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 1, 0, [m0] ; C8080100 V_INTERP_P2_F32 v2, [v2], v1, 1, 0, [m0] ; C8090101 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x1 ; C2000101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s0 ; 7E060200 V_MAD_F32 v5, v2, s8, v3, 0, 0 ; D2820005 040C1102 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_MAD_F32 v4, v2, s8, v3, 0, 0 ; D2820004 040C1102 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800700 00010004 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v3, v1 ; 7E064F01 V_MUL_LEGACY_F32_e32 v3, 2.200000e+00, v3 ; 0E0606FF 400CCCCD V_EXP_F32_e32 v3, v3 ; 7E064B03 V_MOV_B32_e32 v4, 0x7fffffff ; 7E0802FF 7FFFFFFF V_AND_B32_e32 v3, v3, v4 ; 36060903 V_LOG_F32_e32 v3, v3 ; 7E064F03 V_MUL_F32_e32 v3, 4.545450e-01, v3 ; 100606FF 3EE8BA1F V_EXP_F32_e32 v3, v3 ; 7E064B03 V_LOG_F32_e32 v5, v0 ; 7E0A4F00 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_AND_B32_e32 v5, v5, v4 ; 360A0905 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 4.545450e-01, v5 ; 100A0AFF 3EE8BA1F V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_CVT_PKRTZ_F16_F32_e32 v3, v5, v3 ; 5E060705 V_LOG_F32_e32 v0, v2 ; 7E004F02 V_MUL_LEGACY_F32_e32 v0, 2.200000e+00, v0 ; 0E0000FF 400CCCCD V_EXP_F32_e32 v0, v0 ; 7E004B00 V_AND_B32_e32 v0, v0, v4 ; 36000900 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_F32_e32 v0, 4.545450e-01, v0 ; 100000FF 3EE8BA1F V_EXP_F32_e32 v0, v0 ; 7E004B00 V_CVT_PKRTZ_F16_F32_e64 v0, v0, 1.000000e+00, 0, 0 ; D25E0000 0001E500 EXP 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 V_MOV_B32_e32 v5, 0.000000e+00 ; 7E0A0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MUL_F32_e32 v6, v1, v6 ; 100C0D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v6, s4, v6 ; 0A0C0C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MUL_F32_e32 v0, v0, v7 ; 10000F00 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s0, v0 ; 0A000000 EXP 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: ABS TEMP[0].x, TEMP[1].xxxx 8: LG2 TEMP[0].x, TEMP[0].xxxx 9: ABS TEMP[2].x, TEMP[1].yyyy 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ABS TEMP[2].x, TEMP[1].zzzz 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[0].z, TEMP[2].xxxx 15: MOV TEMP[1].w, TEMP[1].wwww 16: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %49 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %46, float 1.000000e+00) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %49) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %50) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 1, 0, [m0] ; C8080100 V_INTERP_P2_F32 v2, [v2], v1, 1, 0, [m0] ; C8090101 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x1 ; C2000101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s0 ; 7E060200 V_MAD_F32 v5, v2, s8, v3, 0, 0 ; D2820005 040C1102 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_MAD_F32 v4, v2, s8, v3, 0, 0 ; D2820004 040C1102 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010004 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v4, v3 ; 7E084F03 V_MUL_LEGACY_F32_e32 v4, 1.000000e+00, v4 ; 0E0808F2 V_EXP_F32_e32 v4, v4 ; 7E084B04 V_LOG_F32_e32 v5, v2 ; 7E0A4F02 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_MOV_B32_e32 v6, 0x7fffffff ; 7E0C02FF 7FFFFFFF V_AND_B32_e32 v5, v5, v6 ; 360A0D05 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 4.545450e-01, v5 ; 100A0AFF 3EE8BA1F V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_CVT_PKRTZ_F16_F32_e32 v4, v5, v4 ; 5E080905 V_LOG_F32_e32 v5, v1 ; 7E0A4F01 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_AND_B32_e32 v5, v5, v6 ; 360A0D05 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 4.545450e-01, v5 ; 100A0AFF 3EE8BA1F V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_LEGACY_F32_e32 v0, 2.200000e+00, v0 ; 0E0000FF 400CCCCD V_EXP_F32_e32 v0, v0 ; 7E004B00 V_AND_B32_e32 v0, v0, v6 ; 36000D00 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_F32_e32 v0, 4.545450e-01, v0 ; 100000FF 3EE8BA1F V_EXP_F32_e32 v0, v0 ; 7E004B00 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v5 ; 5E000B00 EXP 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..1] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.2000, 0.0000, 1.0000, 0.0000} 0: ABS TEMP[0].x, IN[2].xxxx 1: LG2 TEMP[0].x, TEMP[0].xxxx 2: ABS TEMP[1].x, IN[2].yyyy 3: LG2 TEMP[1].x, TEMP[1].xxxx 4: MOV TEMP[0].y, TEMP[1].xxxx 5: ABS TEMP[1].x, IN[2].zzzz 6: LG2 TEMP[1].x, TEMP[1].xxxx 7: MOV TEMP[0].z, TEMP[1].xxxx 8: MUL TEMP[1].xyz, TEMP[0], IMM[0].xxxx 9: MOV TEMP[0].z, TEMP[1].xyzx 10: EX2 TEMP[2].x, TEMP[1].xxxx 11: EX2 TEMP[3].x, TEMP[1].yyyy 12: MOV TEMP[2].y, TEMP[3].xxxx 13: EX2 TEMP[1].x, TEMP[1].zzzz 14: MOV TEMP[2].z, TEMP[1].xxxx 15: MOV TEMP[0].xy, CONST[1].xyxx 16: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 17: MOV TEMP[0].xy, TEMP[0].xyxx 18: MOV TEMP[1].xy, IN[1].xyxx 19: MOV TEMP[2].w, IN[2].wwww 20: MOV TEMP[0].zw, IMM[0].zzyz 21: MOV TEMP[1].zw, IMM[0].zzyz 22: MOV OUT[1], TEMP[2] 23: MOV OUT[2], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call float @fabs(float %33) %38 = call float @llvm.log2.f32(float %37) %39 = call float @fabs(float %34) %40 = call float @llvm.log2.f32(float %39) %41 = call float @fabs(float %35) %42 = call float @llvm.log2.f32(float %41) %43 = fmul float %38, 0x40019999A0000000 %44 = fmul float %40, 0x40019999A0000000 %45 = fmul float %42, 0x40019999A0000000 %46 = call float @llvm.AMDIL.exp.(float %43) %47 = call float @llvm.AMDIL.exp.(float %44) %48 = call float @llvm.AMDIL.exp.(float %45) %49 = fsub float -0.000000e+00, %13 %50 = fmul float %21, %15 %51 = fadd float %50, %49 %52 = fsub float -0.000000e+00, %14 %53 = fmul float %22, %16 %54 = fadd float %53, %52 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %27, float %28, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %54, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 0x7fffffff ; 7E0A02FF 7FFFFFFF S_WAITCNT vmcnt(0) ; BF8C0770 V_AND_B32_e32 v6, v3, v5 ; 360C0B03 V_LOG_F32_e32 v6, v6 ; 7E0C4F06 V_MUL_F32_e32 v6, 2.200000e+00, v6 ; 100C0CFF 400CCCCD V_EXP_F32_e32 v6, v6 ; 7E0C4B06 V_AND_B32_e32 v7, v2, v5 ; 360E0B02 V_LOG_F32_e32 v7, v7 ; 7E0E4F07 V_MUL_F32_e32 v7, 2.200000e+00, v7 ; 100E0EFF 400CCCCD V_EXP_F32_e32 v7, v7 ; 7E0E4B07 V_AND_B32_e32 v5, v1, v5 ; 360A0B01 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 2.200000e+00, v5 ; 100A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 EXP 15, 32, 0, 0, 0, v5, v7, v6, v4 ; F800020F 04060705 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 1.000000e+00 ; 7E0A02F2 V_MOV_B32_e32 v6, 0.000000e+00 ; 7E0C0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v4, s4 ; 7E080204 V_MUL_F32_e32 v4, v1, v4 ; 10080901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v4, s4, v4 ; 0A080804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MUL_F32_e32 v0, v0, v7 ; 10000F00 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s0, v0 ; 0A000000 EXP 15, 12, 0, 1, 0, v0, v4, v6, v5 ; F80008CF 05060400 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { -0.5000, 0.0000, 0.0000, 2.0000} IMM[1] FLT32 { -2.0000, 3.0000, 1.0000, 0.4545} 0: ADD TEMP[0].xy, IMM[0].xxxx, IN[1] 1: DP2 TEMP[0].x, TEMP[0].xyyy, TEMP[0].xyyy 2: MAX TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy 3: RSQ TEMP[2].x, TEMP[1].xxxx 4: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[1].xxxx 5: CMP TEMP[0].x, -TEMP[1].xxxx, TEMP[2].xxxx, IMM[0].zzzz 6: MOV TEMP[1].x, -CONST[0] 7: MAD TEMP[2].x, TEMP[0].xxxx, IMM[0].wwww, TEMP[1].xxxx 8: ADD TEMP[1].y, TEMP[1].xxxx, CONST[0].yyyy 9: RCP TEMP[1].x, TEMP[1].yyyy 10: MUL TEMP[1], TEMP[1].xxxx, TEMP[2].xxxx 11: MOV_SAT TEMP[1], TEMP[1] 12: MAD TEMP[2].y, TEMP[1].xxxx, IMM[1].xxxx, IMM[1].yyyy 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 14: MAD TEMP[1].x, TEMP[2].yyyy, -TEMP[1].xxxx, IMM[1].zzzz 15: MUL TEMP[1].w, TEMP[1].xxxx, IN[0].wwww 16: MOV TEMP[1].w, TEMP[1].wwww 17: ABS TEMP[2].x, IN[0].xxxx 18: LG2 TEMP[0].x, TEMP[2].xxxx 19: ABS TEMP[2].x, IN[0].yyyy 20: LG2 TEMP[2].x, TEMP[2].xxxx 21: MOV TEMP[0].y, TEMP[2].xxxx 22: ABS TEMP[2].x, IN[0].zzzz 23: LG2 TEMP[2].x, TEMP[2].xxxx 24: MOV TEMP[0].z, TEMP[2].xxxx 25: MUL TEMP[0].xyz, TEMP[0], IMM[1].wwww 26: EX2 TEMP[1].x, TEMP[0].xxxx 27: EX2 TEMP[2].x, TEMP[0].yyyy 28: MOV TEMP[1].y, TEMP[2].xxxx 29: EX2 TEMP[0].x, TEMP[0].zzzz 30: MOV TEMP[1].z, TEMP[0].xxxx 31: MOV OUT[0], TEMP[1] 32: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = fadd float -5.000000e-01, %30 %33 = fadd float -5.000000e-01, %31 %34 = fmul float %32, %32 %35 = fmul float %33, %33 %36 = fadd float %34, %35 %37 = fcmp uge float %36, 0x3E7AD7F2A0000000 %38 = select i1 %37, float %36, float 0x3E7AD7F2A0000000 %39 = call float @llvm.AMDGPU.rsq.clamped.f32(float %38) %40 = fmul float %39, %38 %41 = fsub float -0.000000e+00, %38 %42 = call float @llvm.AMDGPU.cndlt(float %41, float %40, float 0.000000e+00) %43 = fsub float -0.000000e+00, %24 %44 = fmul float %42, 2.000000e+00 %45 = fadd float %44, %43 %46 = fadd float %43, %25 %47 = fdiv float 1.000000e+00, %46 %48 = fmul float %47, %45 %49 = fmul float %47, %45 %50 = fmul float %47, %45 %51 = fmul float %47, %45 %52 = call float @llvm.AMDIL.clamp.(float %48, float 0.000000e+00, float 1.000000e+00) %53 = call float @llvm.AMDIL.clamp.(float %49, float 0.000000e+00, float 1.000000e+00) %54 = call float @llvm.AMDIL.clamp.(float %50, float 0.000000e+00, float 1.000000e+00) %55 = call float @llvm.AMDIL.clamp.(float %51, float 0.000000e+00, float 1.000000e+00) %56 = fmul float %52, -2.000000e+00 %57 = fadd float %56, 3.000000e+00 %58 = fmul float %52, %52 %59 = fsub float -0.000000e+00, %58 %60 = fmul float %57, %59 %61 = fadd float %60, 1.000000e+00 %62 = fmul float %61, %29 %63 = call float @fabs(float %26) %64 = call float @llvm.log2.f32(float %63) %65 = call float @fabs(float %27) %66 = call float @llvm.log2.f32(float %65) %67 = call float @fabs(float %28) %68 = call float @llvm.log2.f32(float %67) %69 = fmul float %64, 0x3FDD1743E0000000 %70 = fmul float %66, 0x3FDD1743E0000000 %71 = fmul float %68, 0x3FDD1743E0000000 %72 = call float @llvm.AMDIL.exp.(float %69) %73 = call float @llvm.AMDIL.exp.(float %70) %74 = call float @llvm.AMDIL.exp.(float %71) %75 = call i32 @llvm.SI.packf16(float %72, float %73) %76 = bitcast i32 %75 to float %77 = call i32 @llvm.SI.packf16(float %74, float %62) %78 = bitcast i32 %77 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %76, float %78, float %76, float %78) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #4 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { readonly } attributes #4 = { nounwind readnone readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 1, [m0] ; C8080400 V_INTERP_P2_F32 v2, [v2], v1, 0, 1, [m0] ; C8090401 V_ADD_F32_e32 v2, -5.000000e-01, v2 ; 060404F1 V_INTERP_P1_F32 v3, v0, 1, 1, [m0] ; C80C0500 V_INTERP_P2_F32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 V_ADD_F32_e32 v3, -5.000000e-01, v3 ; 060606F1 V_MUL_F32_e32 v3, v3, v3 ; 10060703 V_MAD_F32 v2, v2, v2, v3, 0, 0 ; D2820002 040E0502 V_MOV_B32_e32 v3, 1.000000e-07 ; 7E0602FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v2, v3 ; 7C0C0702 V_CMP_U_F32_e64 s[0:1], v2, v2, 0, 0 ; D0100000 00020502 V_CNDMASK_B32_e64 v3, 0, -1, vcc, 0, 0, 0, 0 ; D2000003 01A98280 V_CNDMASK_B32_e64 v4, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000004 00018280 V_OR_B32_e32 v3, v3, v4 ; 38060903 V_MOV_B32_e32 v4, 0x33d6bf95 ; 7E0802FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v3, 0, 0, 0 ; D10A0000 00010103 V_CNDMASK_B32_e64 v2, v4, v2, s[0:1], 0, 0, 0, 0 ; D2000002 00020504 V_RSQ_CLAMP_F32_e32 v3, v2 ; 7E065902 V_MUL_F32_e32 v3, v3, v2 ; 10060503 V_MOV_B32_e32 v4, 0x80000000 ; 7E0802FF 80000000 V_XOR_B32_e32 v2, v2, v4 ; 3A040902 V_CMP_GT_F32_e32 vcc, 0, v2 ; 7C080480 V_CNDMASK_B32_e64 v2, 0.000000e+00, v3, vcc, 0, 0, 0, 0 ; D2000002 01AA0680 V_ADD_F32_e32 v2, v2, v2 ; 06040502 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUB_F32_e32 v2, s4, v2 ; 08040404 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x1 ; C2000101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s0 ; 7E060200 V_SUB_F32_e32 v3, s4, v3 ; 08060604 V_RCP_F32_e32 v3, v3 ; 7E065503 V_MUL_F32_e32 v2, v3, v2 ; 10040503 V_ADD_F32_e64 v2, v2, 0, 1, 0 ; D2060802 00010102 V_MUL_F32_e32 v3, v2, v2 ; 10060502 V_ADD_F32_e32 v2, v2, v2 ; 06040502 V_SUBREV_F32_e32 v2, 3.000000e+00, v2 ; 0A0404FF 40400000 V_MAD_F32 v2, v2, v3, 1.000000e+00, 0, 0 ; D2820002 03CA0702 V_INTERP_P1_F32 v3, v0, 3, 0, [m0] ; C80C0300 V_INTERP_P2_F32 v3, [v3], v1, 3, 0, [m0] ; C80D0301 V_MUL_F32_e32 v2, v2, v3 ; 10040702 V_INTERP_P1_F32 v3, v0, 2, 0, [m0] ; C80C0200 V_INTERP_P2_F32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 V_MOV_B32_e32 v4, 0x7fffffff ; 7E0802FF 7FFFFFFF V_AND_B32_e32 v3, v3, v4 ; 36060903 V_LOG_F32_e32 v3, v3 ; 7E064F03 V_MUL_F32_e32 v3, 4.545450e-01, v3 ; 100606FF 3EE8BA1F V_EXP_F32_e32 v3, v3 ; 7E064B03 V_CVT_PKRTZ_F16_F32_e32 v2, v3, v2 ; 5E040503 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_AND_B32_e32 v3, v3, v4 ; 36060903 V_LOG_F32_e32 v3, v3 ; 7E064F03 V_MUL_F32_e32 v3, 4.545450e-01, v3 ; 100606FF 3EE8BA1F V_EXP_F32_e32 v3, v3 ; 7E064B03 V_INTERP_P1_F32 v5, v0, 0, 0, [m0] ; C8140000 V_INTERP_P2_F32 v5, [v5], v1, 0, 0, [m0] ; C8150001 V_AND_B32_e32 v0, v5, v4 ; 36000905 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_F32_e32 v0, 4.545450e-01, v0 ; 100000FF 3EE8BA1F V_EXP_F32_e32 v0, v0 ; 7E004B00 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v3 ; 5E000700 EXP 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..1] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.2000, 0.0000, 1.0000, 0.0000} 0: ABS TEMP[0].x, IN[2].xxxx 1: LG2 TEMP[0].x, TEMP[0].xxxx 2: ABS TEMP[1].x, IN[2].yyyy 3: LG2 TEMP[1].x, TEMP[1].xxxx 4: MOV TEMP[0].y, TEMP[1].xxxx 5: ABS TEMP[1].x, IN[2].zzzz 6: LG2 TEMP[1].x, TEMP[1].xxxx 7: MOV TEMP[0].z, TEMP[1].xxxx 8: MUL TEMP[1].xyz, TEMP[0], IMM[0].xxxx 9: MOV TEMP[0].z, TEMP[1].xyzx 10: EX2 TEMP[2].x, TEMP[1].xxxx 11: EX2 TEMP[3].x, TEMP[1].yyyy 12: MOV TEMP[2].y, TEMP[3].xxxx 13: EX2 TEMP[1].x, TEMP[1].zzzz 14: MOV TEMP[2].z, TEMP[1].xxxx 15: MOV TEMP[0].xy, CONST[1].xyxx 16: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 17: MOV TEMP[0].xy, TEMP[0].xyxx 18: MOV TEMP[1].xy, IN[1].xyxx 19: MOV TEMP[2].w, IN[2].wwww 20: MOV TEMP[0].zw, IMM[0].zzyz 21: MOV TEMP[1].zw, IMM[0].zzyz 22: MOV OUT[1], TEMP[2] 23: MOV OUT[2], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call float @fabs(float %33) %38 = call float @llvm.log2.f32(float %37) %39 = call float @fabs(float %34) %40 = call float @llvm.log2.f32(float %39) %41 = call float @fabs(float %35) %42 = call float @llvm.log2.f32(float %41) %43 = fmul float %38, 0x40019999A0000000 %44 = fmul float %40, 0x40019999A0000000 %45 = fmul float %42, 0x40019999A0000000 %46 = call float @llvm.AMDIL.exp.(float %43) %47 = call float @llvm.AMDIL.exp.(float %44) %48 = call float @llvm.AMDIL.exp.(float %45) %49 = fsub float -0.000000e+00, %13 %50 = fmul float %21, %15 %51 = fadd float %50, %49 %52 = fsub float -0.000000e+00, %14 %53 = fmul float %22, %16 %54 = fadd float %53, %52 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %27, float %28, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %54, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 0x7fffffff ; 7E0A02FF 7FFFFFFF S_WAITCNT vmcnt(0) ; BF8C0770 V_AND_B32_e32 v6, v3, v5 ; 360C0B03 V_LOG_F32_e32 v6, v6 ; 7E0C4F06 V_MUL_F32_e32 v6, 2.200000e+00, v6 ; 100C0CFF 400CCCCD V_EXP_F32_e32 v6, v6 ; 7E0C4B06 V_AND_B32_e32 v7, v2, v5 ; 360E0B02 V_LOG_F32_e32 v7, v7 ; 7E0E4F07 V_MUL_F32_e32 v7, 2.200000e+00, v7 ; 100E0EFF 400CCCCD V_EXP_F32_e32 v7, v7 ; 7E0E4B07 V_AND_B32_e32 v5, v1, v5 ; 360A0B01 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 2.200000e+00, v5 ; 100A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 EXP 15, 32, 0, 0, 0, v5, v7, v6, v4 ; F800020F 04060705 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 1.000000e+00 ; 7E0A02F2 V_MOV_B32_e32 v6, 0.000000e+00 ; 7E0C0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v4, s4 ; 7E080204 V_MUL_F32_e32 v4, v1, v4 ; 10080901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v4, s4, v4 ; 0A080804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MUL_F32_e32 v0, v0, v7 ; 10000F00 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s0, v0 ; 0A000000 EXP 15, 12, 0, 1, 0, v0, v4, v6, v5 ; F80008CF 05060400 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MUL TEMP[0].w, TEMP[1].wwww, IN[0].wwww 7: MOV TEMP[0].w, TEMP[0].wwww 8: MUL TEMP[1].xyz, TEMP[1], IN[0] 9: ABS TEMP[2].x, TEMP[1].xxxx 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: ABS TEMP[3].x, TEMP[1].yyyy 12: LG2 TEMP[3].x, TEMP[3].xxxx 13: MOV TEMP[2].y, TEMP[3].xxxx 14: ABS TEMP[1].x, TEMP[1].zzzz 15: LG2 TEMP[1].x, TEMP[1].xxxx 16: MOV TEMP[2].z, TEMP[1].xxxx 17: MUL TEMP[1].xyz, TEMP[2], IMM[0].zzzz 18: EX2 TEMP[0].x, TEMP[1].xxxx 19: EX2 TEMP[2].x, TEMP[1].yyyy 20: MOV TEMP[0].y, TEMP[2].xxxx 21: EX2 TEMP[1].x, TEMP[1].zzzz 22: MOV TEMP[0].z, TEMP[1].xxxx 23: MOV OUT[0], TEMP[0] 24: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call float @llvm.pow.f32(float %39, float 0x40019999A0000000) %44 = call float @llvm.pow.f32(float %40, float 0x40019999A0000000) %45 = call float @llvm.pow.f32(float %41, float 0x40019999A0000000) %46 = call float @llvm.pow.f32(float %42, float 1.000000e+00) %47 = fmul float %46, %29 %48 = fmul float %43, %26 %49 = fmul float %44, %27 %50 = fmul float %45, %28 %51 = call float @fabs(float %48) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %49) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %50) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %47) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 1, [m0] ; C80C0500 V_INTERP_P2_F32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 V_INTERP_P1_F32 v2, v0, 0, 1, [m0] ; C8080400 V_INTERP_P2_F32 v2, [v2], v1, 0, 1, [m0] ; C8090401 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x0 ; C0C60700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030202 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v6, v5 ; 7E0C4F05 V_MUL_LEGACY_F32_e32 v6, 1.000000e+00, v6 ; 0E0C0CF2 V_EXP_F32_e32 v6, v6 ; 7E0C4B06 V_INTERP_P1_F32 v7, v0, 3, 0, [m0] ; C81C0300 V_INTERP_P2_F32 v7, [v7], v1, 3, 0, [m0] ; C81D0301 V_MUL_F32_e32 v6, v6, v7 ; 100C0F06 V_LOG_F32_e32 v7, v4 ; 7E0E4F04 V_MUL_LEGACY_F32_e32 v7, 2.200000e+00, v7 ; 0E0E0EFF 400CCCCD V_EXP_F32_e32 v7, v7 ; 7E0E4B07 V_INTERP_P1_F32 v8, v0, 2, 0, [m0] ; C8200200 V_INTERP_P2_F32 v8, [v8], v1, 2, 0, [m0] ; C8210201 V_MUL_F32_e32 v7, v7, v8 ; 100E1107 V_MOV_B32_e32 v8, 0x7fffffff ; 7E1002FF 7FFFFFFF V_AND_B32_e32 v7, v7, v8 ; 360E1107 V_LOG_F32_e32 v7, v7 ; 7E0E4F07 V_MUL_F32_e32 v7, 4.545450e-01, v7 ; 100E0EFF 3EE8BA1F V_EXP_F32_e32 v7, v7 ; 7E0E4B07 V_CVT_PKRTZ_F16_F32_e32 v6, v7, v6 ; 5E0C0D07 V_LOG_F32_e32 v7, v3 ; 7E0E4F03 V_MUL_LEGACY_F32_e32 v7, 2.200000e+00, v7 ; 0E0E0EFF 400CCCCD V_EXP_F32_e32 v7, v7 ; 7E0E4B07 V_INTERP_P1_F32 v9, v0, 1, 0, [m0] ; C8240100 V_INTERP_P2_F32 v9, [v9], v1, 1, 0, [m0] ; C8250101 V_MUL_F32_e32 v7, v7, v9 ; 100E1307 V_AND_B32_e32 v7, v7, v8 ; 360E1107 V_LOG_F32_e32 v7, v7 ; 7E0E4F07 V_MUL_F32_e32 v7, 4.545450e-01, v7 ; 100E0EFF 3EE8BA1F V_EXP_F32_e32 v7, v7 ; 7E0E4B07 V_LOG_F32_e32 v2, v2 ; 7E044F02 V_MUL_LEGACY_F32_e32 v2, 2.200000e+00, v2 ; 0E0404FF 400CCCCD V_EXP_F32_e32 v2, v2 ; 7E044B02 V_INTERP_P1_F32 v3, v0, 0, 0, [m0] ; C80C0000 V_INTERP_P2_F32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 V_MUL_F32_e32 v0, v2, v3 ; 10000702 V_AND_B32_e32 v0, v0, v8 ; 36001100 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_F32_e32 v0, 4.545450e-01, v0 ; 100000FF 3EE8BA1F V_EXP_F32_e32 v0, v0 ; 7E004B00 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v7 ; 5E000F00 EXP 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.5000, -0.5000, 0.0000, 1.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[1].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[1].xyxx 3: MAD TEMP[0].xy, TEMP[0], IMM[0], IMM[0].xxxx 4: MOV TEMP[0].xy, TEMP[0].xyxx 5: MOV TEMP[1].xy, TEMP[1].xyxx 6: MOV TEMP[1].zw, IMM[0].wwzw 7: MOV TEMP[0].zw, IMM[0].wwzw 8: MOV OUT[0], TEMP[1] 9: MOV OUT[1], TEMP[0] 10: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 %29 = fmul float %25, 5.000000e-01 %30 = fadd float %29, 5.000000e-01 %31 = fmul float %28, -5.000000e-01 %32 = fadd float %31, 5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %30, float %32, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v4, s4 ; 7E080204 V_MUL_F32_e32 v4, v1, v4 ; 10080901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v4, s4, v4 ; 0A080804 V_MAD_F32 v5, v4, -5.000000e-01, 5.000000e-01, 0, 0 ; D2820005 03C1E304 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MUL_F32_e32 v0, v0, v6 ; 10000D00 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s0, v0 ; 0A000000 V_MAD_F32 v1, v0, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820001 03C1E100 V_MOV_B32_e32 v2, 1.000000e+00 ; 7E0402F2 V_MOV_B32_e32 v3, 0.000000e+00 ; 7E060280 EXP 15, 32, 0, 0, 0, v1, v5, v3, v2 ; F800020F 02030501 EXP 15, 12, 0, 1, 0, v0, v4, v3, v2 ; F80008CF 02030400 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: ABS TEMP[0].x, TEMP[1].xxxx 6: LG2 TEMP[0].x, TEMP[0].xxxx 7: ABS TEMP[2].x, TEMP[1].yyyy 8: LG2 TEMP[2].x, TEMP[2].xxxx 9: MOV TEMP[0].y, TEMP[2].xxxx 10: ABS TEMP[1].x, TEMP[1].zzzz 11: LG2 TEMP[1].x, TEMP[1].xxxx 12: MOV TEMP[0].z, TEMP[1].xxxx 13: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 14: EX2 TEMP[1].x, TEMP[0].xxxx 15: EX2 TEMP[2].x, TEMP[0].yyyy 16: MOV TEMP[1].y, TEMP[2].xxxx 17: EX2 TEMP[0].x, TEMP[0].zzzz 18: MOV TEMP[1].z, TEMP[0].xxxx 19: MOV TEMP[1].w, CONST[0].xxxx 20: MOV OUT[0], TEMP[1] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %26 = load <8 x i32> addrspace(2)* %25, !tbaa !0 %27 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %28 = load <4 x i32> addrspace(2)* %27, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = bitcast float %29 to i32 %32 = bitcast float %30 to i32 %33 = insertelement <2 x i32> undef, i32 %31, i32 0 %34 = insertelement <2 x i32> %33, i32 %32, i32 1 %35 = bitcast <8 x i32> %26 to <32 x i8> %36 = bitcast <4 x i32> %28 to <16 x i8> %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %35, <16 x i8> %36, i32 2) %38 = extractelement <4 x float> %37, i32 0 %39 = extractelement <4 x float> %37, i32 1 %40 = extractelement <4 x float> %37, i32 2 %41 = call float @llvm.pow.f32(float %38, float 0x40019999A0000000) %42 = call float @llvm.pow.f32(float %39, float 0x40019999A0000000) %43 = call float @llvm.pow.f32(float %40, float 0x40019999A0000000) %44 = call float @fabs(float %41) %45 = call float @llvm.log2.f32(float %44) %46 = call float @fabs(float %42) %47 = call float @llvm.log2.f32(float %46) %48 = call float @fabs(float %43) %49 = call float @llvm.log2.f32(float %48) %50 = fmul float %45, 0x3FDD1743E0000000 %51 = fmul float %47, 0x3FDD1743E0000000 %52 = fmul float %49, 0x3FDD1743E0000000 %53 = call float @llvm.AMDIL.exp.(float %50) %54 = call float @llvm.AMDIL.exp.(float %51) %55 = call float @llvm.AMDIL.exp.(float %52) %56 = call i32 @llvm.SI.packf16(float %53, float %54) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float %55, float %24) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x0 ; C0840500 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x0 ; C0C60700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430002 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v3, v1 ; 7E064F01 V_MUL_LEGACY_F32_e32 v3, 2.200000e+00, v3 ; 0E0606FF 400CCCCD V_EXP_F32_e32 v3, v3 ; 7E064B03 V_MOV_B32_e32 v4, 0x7fffffff ; 7E0802FF 7FFFFFFF V_AND_B32_e32 v3, v3, v4 ; 36060903 V_LOG_F32_e32 v3, v3 ; 7E064F03 V_MUL_F32_e32 v3, 4.545450e-01, v3 ; 100606FF 3EE8BA1F V_EXP_F32_e32 v3, v3 ; 7E064B03 V_LOG_F32_e32 v5, v0 ; 7E0A4F00 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_AND_B32_e32 v5, v5, v4 ; 360A0905 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 4.545450e-01, v5 ; 100A0AFF 3EE8BA1F V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_CVT_PKRTZ_F16_F32_e32 v3, v5, v3 ; 5E060705 V_LOG_F32_e32 v0, v2 ; 7E004F02 V_MUL_LEGACY_F32_e32 v0, 2.200000e+00, v0 ; 0E0000FF 400CCCCD V_EXP_F32_e32 v0, v0 ; 7E004B00 V_AND_B32_e32 v0, v0, v4 ; 36000900 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_F32_e32 v0, 4.545450e-01, v0 ; 100000FF 3EE8BA1F V_EXP_F32_e32 v0, v0 ; 7E004B00 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_CVT_PKRTZ_F16_F32_e64 v0, v0, s0, 0, 0 ; D25E0000 00000100 EXP 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].x, IN[1].xxxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = fsub float -0.000000e+00, %13 %29 = fmul float %21, %15 %30 = fadd float %29, %28 %31 = fsub float -0.000000e+00, %14 %32 = fmul float %22, %16 %33 = fadd float %32, %31 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %30, float %33, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 1.000000e+00 ; 7E0A02F2 V_MOV_B32_e32 v6, 0.000000e+00 ; 7E0C0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v6, v6, v5 ; F800020F 05060601 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v4, s4 ; 7E080204 V_MUL_F32_e32 v4, v1, v4 ; 10080901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v4, s4, v4 ; 0A080804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MUL_F32_e32 v0, v0, v7 ; 10000F00 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s0, v0 ; 0A000000 EXP 15, 12, 0, 1, 0, v0, v4, v6, v5 ; F80008CF 05060400 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MAX TEMP[0].x, IN[0].xxxx, IMM[0].xxxx 1: RSQ TEMP[1].x, TEMP[0].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx 3: CMP TEMP[1].x, -TEMP[0].xxxx, TEMP[1].xxxx, IMM[0].yyyy 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV TEMP[0].xyz, IN[0].xxxx 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = fcmp uge float %22, 0x3E7AD7F2A0000000 %24 = select i1 %23, float %22, float 0x3E7AD7F2A0000000 %25 = call float @llvm.AMDGPU.rsq.clamped.f32(float %24) %26 = fmul float %25, %24 %27 = fsub float -0.000000e+00, %24 %28 = call float @llvm.AMDGPU.cndlt(float %27, float %26, float 0.000000e+00) %29 = call i32 @llvm.SI.packf16(float %22, float %22) %30 = bitcast i32 %29 to float %31 = call i32 @llvm.SI.packf16(float %22, float %28) %32 = bitcast i32 %31 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %30, float %32, float %30, float %32) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_MOV_B32_e32 v0, 1.000000e-07 ; 7E0002FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v2, v0 ; 7C0C0102 V_CMP_U_F32_e64 s[0:1], v2, v2, 0, 0 ; D0100000 00020502 V_CNDMASK_B32_e64 v0, 0, -1, vcc, 0, 0, 0, 0 ; D2000000 01A98280 V_CNDMASK_B32_e64 v1, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000001 00018280 V_OR_B32_e32 v0, v0, v1 ; 38000300 V_MOV_B32_e32 v1, 0x33d6bf95 ; 7E0202FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v0, 0, 0, 0 ; D10A0000 00010100 V_CNDMASK_B32_e64 v0, v1, v2, s[0:1], 0, 0, 0, 0 ; D2000000 00020501 V_RSQ_CLAMP_F32_e32 v1, v0 ; 7E025900 V_MUL_F32_e32 v1, v1, v0 ; 10020101 V_MOV_B32_e32 v3, 0x80000000 ; 7E0602FF 80000000 V_XOR_B32_e32 v0, v0, v3 ; 3A000700 V_CMP_GT_F32_e32 vcc, 0, v0 ; 7C080080 V_CNDMASK_B32_e64 v0, 0.000000e+00, v1, vcc, 0, 0, 0, 0 ; D2000000 01AA0280 V_CVT_PKRTZ_F16_F32_e32 v0, v2, v0 ; 5E000102 V_CVT_PKRTZ_F16_F32_e32 v1, v2, v2 ; 5E020502 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..99] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+4] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[0].x, TEMP[0].xxxx 12: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 13: UARL ADDR[0].x, TEMP[0].xxxx 14: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 15: MOV TEMP[2].z, TEMP[0].xxxx 16: MUL TEMP[0].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[0].xyzx 18: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 19: UIF TEMP[0].xxxx :0 20: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 21: MOV TEMP[2].w, TEMP[0].wwww 22: F2I TEMP[3].x, TEMP[0].wwww 23: UARL ADDR[0].x, TEMP[3].xxxx 24: UARL ADDR[0].x, TEMP[3].xxxx 25: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 26: F2I TEMP[4].x, TEMP[0].wwww 27: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 28: UARL ADDR[0].x, TEMP[4].xxxx 29: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 30: MOV TEMP[3].y, TEMP[4].xxxx 31: F2I TEMP[0].x, TEMP[0].wwww 32: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 33: UARL ADDR[0].x, TEMP[0].xxxx 34: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 35: MOV TEMP[3].z, TEMP[0].xxxx 36: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[3], TEMP[2] 37: MOV TEMP[2].xyz, TEMP[0].xyzx 38: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 39: UIF TEMP[0].xxxx :0 40: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 41: MOV TEMP[2].w, TEMP[0].wwww 42: F2I TEMP[4].x, TEMP[0].wwww 43: UARL ADDR[0].x, TEMP[4].xxxx 44: UARL ADDR[0].x, TEMP[4].xxxx 45: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 46: F2I TEMP[4].x, TEMP[0].wwww 47: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 48: UARL ADDR[0].x, TEMP[4].xxxx 49: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 50: MOV TEMP[3].y, TEMP[4].xxxx 51: F2I TEMP[0].x, TEMP[0].wwww 52: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 53: UARL ADDR[0].x, TEMP[0].xxxx 54: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 55: MOV TEMP[3].z, TEMP[0].xxxx 56: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[3], TEMP[2] 57: MOV TEMP[2].xyz, TEMP[0].xyzx 58: ENDIF 59: ENDIF 60: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 61: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 62: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 63: ADD TEMP[0], TEMP[1], CONST[3] 64: MOV TEMP[2].xy, IN[3].xyxx 65: MOV TEMP[1].xyz, TEMP[2].xyzx 66: MOV TEMP[1].w, IMM[0].yyyy 67: MOV OUT[1], TEMP[1] 68: MOV OUT[0], TEMP[0] 69: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = extractelement <4 x float> %39, i32 2 %43 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = fmul float 3.000000e+00, %47 %57 = fmul float %33, 1.000000e+00 %58 = fadd float %57, 0.000000e+00 %59 = fmul float %34, 1.000000e+00 %60 = fadd float %59, 0.000000e+00 %61 = fmul float %35, 1.000000e+00 %62 = fadd float %61, 0.000000e+00 %63 = fmul float %33, 0.000000e+00 %64 = fadd float %63, 1.000000e+00 %65 = fptosi float %56 to i32 %66 = bitcast i32 %65 to float %67 = bitcast float %66 to i32 %68 = shl i32 %67, 4 %69 = add i32 %68, 64 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = shl i32 %67, 4 %72 = add i32 %71, 68 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = shl i32 %67, 4 %75 = add i32 %74, 72 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %67, 4 %78 = add i32 %77, 76 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = fmul float %58, %70 %81 = fmul float %60, %73 %82 = fadd float %80, %81 %83 = fmul float %62, %76 %84 = fadd float %82, %83 %85 = fmul float %64, %79 %86 = fadd float %84, %85 %87 = fptosi float %56 to i32 %88 = bitcast i32 %87 to float %89 = bitcast float %88 to i32 %90 = add i32 1, %89 %91 = bitcast i32 %90 to float %92 = bitcast float %91 to i32 %93 = shl i32 %92, 4 %94 = add i32 %93, 64 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %92, 4 %97 = add i32 %96, 68 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = shl i32 %92, 4 %100 = add i32 %99, 72 %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %100) %102 = shl i32 %92, 4 %103 = add i32 %102, 76 %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %103) %105 = fmul float %58, %95 %106 = fmul float %60, %98 %107 = fadd float %105, %106 %108 = fmul float %62, %101 %109 = fadd float %107, %108 %110 = fmul float %64, %104 %111 = fadd float %109, %110 %112 = fptosi float %56 to i32 %113 = bitcast i32 %112 to float %114 = bitcast float %113 to i32 %115 = add i32 2, %114 %116 = bitcast i32 %115 to float %117 = bitcast float %116 to i32 %118 = shl i32 %117, 4 %119 = add i32 %118, 64 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %117, 4 %122 = add i32 %121, 68 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = shl i32 %117, 4 %125 = add i32 %124, 72 %126 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %125) %127 = shl i32 %117, 4 %128 = add i32 %127, 76 %129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %128) %130 = fmul float %58, %120 %131 = fmul float %60, %123 %132 = fadd float %130, %131 %133 = fmul float %62, %126 %134 = fadd float %132, %133 %135 = fmul float %64, %129 %136 = fadd float %134, %135 %137 = fmul float %86, %40 %138 = fmul float %111, %40 %139 = fmul float %136, %40 %140 = fcmp olt float 0.000000e+00, %41 %141 = sext i1 %140 to i32 %142 = bitcast i32 %141 to float %143 = bitcast float %142 to i32 %144 = icmp ne i32 %143, 0 br i1 %144, label %IF, label %ENDIF IF: ; preds = %main_body %145 = fmul float 3.000000e+00, %48 %146 = fptosi float %145 to i32 %147 = bitcast i32 %146 to float %148 = bitcast float %147 to i32 %149 = shl i32 %148, 4 %150 = add i32 %149, 64 %151 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %150) %152 = shl i32 %148, 4 %153 = add i32 %152, 68 %154 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %153) %155 = shl i32 %148, 4 %156 = add i32 %155, 72 %157 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %156) %158 = shl i32 %148, 4 %159 = add i32 %158, 76 %160 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %159) %161 = fmul float %58, %151 %162 = fmul float %60, %154 %163 = fadd float %161, %162 %164 = fmul float %62, %157 %165 = fadd float %163, %164 %166 = fmul float %64, %160 %167 = fadd float %165, %166 %168 = fptosi float %145 to i32 %169 = bitcast i32 %168 to float %170 = bitcast float %169 to i32 %171 = add i32 1, %170 %172 = bitcast i32 %171 to float %173 = bitcast float %172 to i32 %174 = shl i32 %173, 4 %175 = add i32 %174, 64 %176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %175) %177 = shl i32 %173, 4 %178 = add i32 %177, 68 %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %178) %180 = shl i32 %173, 4 %181 = add i32 %180, 72 %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %181) %183 = shl i32 %173, 4 %184 = add i32 %183, 76 %185 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %184) %186 = fmul float %58, %176 %187 = fmul float %60, %179 %188 = fadd float %186, %187 %189 = fmul float %62, %182 %190 = fadd float %188, %189 %191 = fmul float %64, %185 %192 = fadd float %190, %191 %193 = fptosi float %145 to i32 %194 = bitcast i32 %193 to float %195 = bitcast float %194 to i32 %196 = add i32 2, %195 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = shl i32 %198, 4 %200 = add i32 %199, 64 %201 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %200) %202 = shl i32 %198, 4 %203 = add i32 %202, 68 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %198, 4 %206 = add i32 %205, 72 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %198, 4 %209 = add i32 %208, 76 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %58, %201 %212 = fmul float %60, %204 %213 = fadd float %211, %212 %214 = fmul float %62, %207 %215 = fadd float %213, %214 %216 = fmul float %64, %210 %217 = fadd float %215, %216 %218 = fmul float %41, %167 %219 = fadd float %218, %137 %220 = fmul float %41, %192 %221 = fadd float %220, %138 %222 = fmul float %41, %217 %223 = fadd float %222, %139 %224 = fcmp olt float 0.000000e+00, %42 %225 = sext i1 %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = icmp ne i32 %227, 0 br i1 %228, label %IF44, label %ENDIF ENDIF: ; preds = %IF44, %IF, %main_body %temp8.0 = phi float [ %137, %main_body ], [ %327, %IF44 ], [ %219, %IF ] %temp9.0 = phi float [ %138, %main_body ], [ %329, %IF44 ], [ %221, %IF ] %temp10.0 = phi float [ %139, %main_body ], [ %331, %IF44 ], [ %223, %IF ] %229 = fmul float %temp9.0, %17 %230 = fmul float %temp9.0, %18 %231 = fmul float %temp9.0, %19 %232 = fmul float %temp9.0, %20 %233 = fmul float %temp8.0, %13 %234 = fadd float %233, %229 %235 = fmul float %temp8.0, %14 %236 = fadd float %235, %230 %237 = fmul float %temp8.0, %15 %238 = fadd float %237, %231 %239 = fmul float %temp8.0, %16 %240 = fadd float %239, %232 %241 = fmul float %temp10.0, %21 %242 = fadd float %241, %234 %243 = fmul float %temp10.0, %22 %244 = fadd float %243, %236 %245 = fmul float %temp10.0, %23 %246 = fadd float %245, %238 %247 = fmul float %temp10.0, %24 %248 = fadd float %247, %240 %249 = fadd float %242, %25 %250 = fadd float %244, %26 %251 = fadd float %246, %27 %252 = fadd float %248, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %54, float %55, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %249, float %250, float %251, float %252) ret void IF44: ; preds = %IF %253 = fmul float 3.000000e+00, %49 %254 = fptosi float %253 to i32 %255 = bitcast i32 %254 to float %256 = bitcast float %255 to i32 %257 = shl i32 %256, 4 %258 = add i32 %257, 64 %259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %258) %260 = shl i32 %256, 4 %261 = add i32 %260, 68 %262 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %261) %263 = shl i32 %256, 4 %264 = add i32 %263, 72 %265 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %264) %266 = shl i32 %256, 4 %267 = add i32 %266, 76 %268 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %267) %269 = fmul float %58, %259 %270 = fmul float %60, %262 %271 = fadd float %269, %270 %272 = fmul float %62, %265 %273 = fadd float %271, %272 %274 = fmul float %64, %268 %275 = fadd float %273, %274 %276 = fptosi float %253 to i32 %277 = bitcast i32 %276 to float %278 = bitcast float %277 to i32 %279 = add i32 1, %278 %280 = bitcast i32 %279 to float %281 = bitcast float %280 to i32 %282 = shl i32 %281, 4 %283 = add i32 %282, 64 %284 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %283) %285 = shl i32 %281, 4 %286 = add i32 %285, 68 %287 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %286) %288 = shl i32 %281, 4 %289 = add i32 %288, 72 %290 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %289) %291 = shl i32 %281, 4 %292 = add i32 %291, 76 %293 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %292) %294 = fmul float %58, %284 %295 = fmul float %60, %287 %296 = fadd float %294, %295 %297 = fmul float %62, %290 %298 = fadd float %296, %297 %299 = fmul float %64, %293 %300 = fadd float %298, %299 %301 = fptosi float %253 to i32 %302 = bitcast i32 %301 to float %303 = bitcast float %302 to i32 %304 = add i32 2, %303 %305 = bitcast i32 %304 to float %306 = bitcast float %305 to i32 %307 = shl i32 %306, 4 %308 = add i32 %307, 64 %309 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %308) %310 = shl i32 %306, 4 %311 = add i32 %310, 68 %312 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %311) %313 = shl i32 %306, 4 %314 = add i32 %313, 72 %315 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %314) %316 = shl i32 %306, 4 %317 = add i32 %316, 76 %318 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %317) %319 = fmul float %58, %309 %320 = fmul float %60, %312 %321 = fadd float %319, %320 %322 = fmul float %62, %315 %323 = fadd float %321, %322 %324 = fmul float %64, %318 %325 = fadd float %323, %324 %326 = fmul float %42, %275 %327 = fadd float %326, %219 %328 = fmul float %42, %300 %329 = fadd float %328, %221 %330 = fmul float %42, %325 %331 = fadd float %330, %223 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v3, s10, v0 ; 4A06000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[15:18], s[4:7][v3] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010F03 V_MOV_B32_e32 v0, 3.000000e+00 ; 7E0002FF 40400000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v15, v0 ; 1000010F V_CVT_I32_F32_e32 v2, v0 ; 7E041100 V_LSHLREV_B32_e32 v0, 4, v2 ; 34000484 V_ADD_I32_e32 v1, 0x44, v0 ; 4A0200FF 00000044 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[7:10], s[4:7][v3] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010703 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v1, v8, v1 ; 10020308 V_ADD_I32_e32 v4, 64, v0 ; 4A0800C0 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v7, v4, v1, 0, 0 ; D2820001 04060907 V_ADD_I32_e32 v4, 0x48, v0 ; 4A0800FF 00000048 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v9, v4, v1, 0, 0 ; D2820001 04060909 V_ADD_I32_e32 v0, 0x4c, v0 ; 4A0000FF 0000004C BUFFER_LOAD_DWORD v0, s[0:3] + v0 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v1, v0 ; 06000101 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[11:14], s[4:7][v3] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010B03 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v0, v11 ; 10001700 V_ADD_I32_e32 v1, 2, v2 ; 4A020482 V_LSHLREV_B32_e32 v1, 4, v1 ; 34020284 V_ADD_I32_e32 v4, 0x44, v1 ; 4A0802FF 00000044 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v8, v4 ; 10080908 V_ADD_I32_e32 v5, 64, v1 ; 4A0A02C0 BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v7, v5, v4, 0, 0 ; D2820004 04120B07 V_ADD_I32_e32 v5, 0x48, v1 ; 4A0A02FF 00000048 BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v9, v5, v4, 0, 0 ; D2820004 04120B09 V_ADD_I32_e32 v1, 0x4c, v1 ; 4A0202FF 0000004C BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v1, v4, v1 ; 06020304 V_MUL_F32_e32 v1, v1, v11 ; 10021701 V_ADD_I32_e32 v2, 1, v2 ; 4A040481 V_LSHLREV_B32_e32 v2, 4, v2 ; 34040484 V_ADD_I32_e32 v4, 0x44, v2 ; 4A0804FF 00000044 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v8, v4 ; 10080908 V_ADD_I32_e32 v5, 64, v2 ; 4A0A04C0 BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v7, v5, v4, 0, 0 ; D2820004 04120B07 V_ADD_I32_e32 v5, 0x48, v2 ; 4A0A04FF 00000048 BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v9, v5, v4, 0, 0 ; D2820004 04120B09 V_ADD_I32_e32 v2, 0x4c, v2 ; 4A0404FF 0000004C BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v2, v4, v2 ; 06040504 V_MUL_F32_e32 v2, v2, v11 ; 10041702 S_LOAD_DWORDX4 s[4:7], s[8:9], 0xc ; C082090C S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[3:6], s[4:7][v3] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010303 V_CMP_GT_F32_e64 s[4:5], v12, 0.000000e+00, 0, 0 ; D0080004 0001010C V_MOV_B32_e32 v19, 1.000000e+00 ; 7E2602F2 S_WAITCNT vmcnt(0) ; BF8C0770 S_AND_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842404 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E S_CBRANCH_EXECZ BB0_3 ; BF880000 V_MOV_B32_e32 v20, 3.000000e+00 ; 7E2802FF 40400000 V_MUL_F32_e32 v20, v16, v20 ; 10282910 V_CVT_I32_F32_e32 v20, v20 ; 7E281114 V_LSHLREV_B32_e32 v21, 4, v20 ; 342A2884 V_ADD_I32_e32 v22, 64, v21 ; 4A2C2AC0 BUFFER_LOAD_DWORD v22, s[0:3] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001616 V_ADD_I32_e32 v23, 0x44, v21 ; 4A2E2AFF 00000044 BUFFER_LOAD_DWORD v23, s[0:3] + v23 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001717 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v23, v8, v23 ; 102E2F08 V_MAD_F32 v22, v7, v22, v23, 0, 0 ; D2820016 045E2D07 V_ADD_I32_e32 v23, 0x48, v21 ; 4A2E2AFF 00000048 BUFFER_LOAD_DWORD v23, s[0:3] + v23 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001717 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v22, v9, v23, v22, 0, 0 ; D2820016 045A2F09 V_ADD_I32_e32 v21, 0x4c, v21 ; 4A2A2AFF 0000004C BUFFER_LOAD_DWORD v21, s[0:3] + v21 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001515 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v21, v19, v21, v22, 0, 0 ; D2820015 045A2B13 V_MAD_F32 v0, v12, v21, v0, 0, 0 ; D2820000 04022B0C V_ADD_I32_e32 v21, 2, v20 ; 4A2A2882 V_LSHLREV_B32_e32 v21, 4, v21 ; 342A2A84 V_ADD_I32_e32 v22, 64, v21 ; 4A2C2AC0 BUFFER_LOAD_DWORD v22, s[0:3] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001616 V_ADD_I32_e32 v23, 0x44, v21 ; 4A2E2AFF 00000044 BUFFER_LOAD_DWORD v23, s[0:3] + v23 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001717 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v23, v8, v23 ; 102E2F08 V_MAD_F32 v22, v7, v22, v23, 0, 0 ; D2820016 045E2D07 V_ADD_I32_e32 v23, 0x48, v21 ; 4A2E2AFF 00000048 BUFFER_LOAD_DWORD v23, s[0:3] + v23 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001717 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v22, v9, v23, v22, 0, 0 ; D2820016 045A2F09 V_ADD_I32_e32 v21, 0x4c, v21 ; 4A2A2AFF 0000004C BUFFER_LOAD_DWORD v21, s[0:3] + v21 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001515 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v21, v19, v21, v22, 0, 0 ; D2820015 045A2B13 V_MAD_F32 v1, v12, v21, v1, 0, 0 ; D2820001 04062B0C V_ADD_I32_e32 v20, 1, v20 ; 4A282881 V_LSHLREV_B32_e32 v20, 4, v20 ; 34282884 V_ADD_I32_e32 v21, 64, v20 ; 4A2A28C0 BUFFER_LOAD_DWORD v21, s[0:3] + v21 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001515 V_ADD_I32_e32 v22, 0x44, v20 ; 4A2C28FF 00000044 BUFFER_LOAD_DWORD v22, s[0:3] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001616 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v22, v8, v22 ; 102C2D08 V_MAD_F32 v21, v7, v21, v22, 0, 0 ; D2820015 045A2B07 V_ADD_I32_e32 v22, 0x48, v20 ; 4A2C28FF 00000048 BUFFER_LOAD_DWORD v22, s[0:3] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001616 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v21, v9, v22, v21, 0, 0 ; D2820015 04562D09 V_ADD_I32_e32 v20, 0x4c, v20 ; 4A2828FF 0000004C BUFFER_LOAD_DWORD v20, s[0:3] + v20 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001414 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v20, v19, v20, v21, 0, 0 ; D2820014 04562913 V_MAD_F32 v2, v12, v20, v2, 0, 0 ; D2820002 040A290C V_CMP_GT_F32_e64 s[6:7], v13, 0.000000e+00, 0, 0 ; D0080006 0001010D S_AND_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862406 S_XOR_B64 s[6:7], exec, s[6:7] ; 8986067E S_CBRANCH_EXECZ BB0_2 ; BF880000 V_MOV_B32_e32 v20, 3.000000e+00 ; 7E2802FF 40400000 V_MUL_F32_e32 v15, v17, v20 ; 101E2911 V_CVT_I32_F32_e32 v15, v15 ; 7E1E110F V_LSHLREV_B32_e32 v16, 4, v15 ; 34201E84 V_ADD_I32_e32 v17, 64, v16 ; 4A2220C0 BUFFER_LOAD_DWORD v17, s[0:3] + v17 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001111 V_ADD_I32_e32 v18, 0x44, v16 ; 4A2420FF 00000044 BUFFER_LOAD_DWORD v18, s[0:3] + v18 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001212 S_WAITCNT vmcnt(0) expcnt(0) ; BF8C0700 V_MUL_F32_e32 v18, v8, v18 ; 10242508 V_MAD_F32 v17, v7, v17, v18, 0, 0 ; D2820011 044A2307 V_ADD_I32_e32 v18, 0x48, v16 ; 4A2420FF 00000048 BUFFER_LOAD_DWORD v18, s[0:3] + v18 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001212 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v17, v9, v18, v17, 0, 0 ; D2820011 04462509 V_ADD_I32_e32 v16, 0x4c, v16 ; 4A2020FF 0000004C BUFFER_LOAD_DWORD v16, s[0:3] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001010 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v16, v19, v16, v17, 0, 0 ; D2820010 04462113 V_MAD_F32 v0, v13, v16, v0, 0, 0 ; D2820000 0402210D V_ADD_I32_e32 v16, 2, v15 ; 4A201E82 V_LSHLREV_B32_e32 v16, 4, v16 ; 34202084 V_ADD_I32_e32 v17, 64, v16 ; 4A2220C0 BUFFER_LOAD_DWORD v17, s[0:3] + v17 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001111 V_ADD_I32_e32 v18, 0x44, v16 ; 4A2420FF 00000044 BUFFER_LOAD_DWORD v18, s[0:3] + v18 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001212 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v18, v8, v18 ; 10242508 V_MAD_F32 v17, v7, v17, v18, 0, 0 ; D2820011 044A2307 V_ADD_I32_e32 v18, 0x48, v16 ; 4A2420FF 00000048 BUFFER_LOAD_DWORD v18, s[0:3] + v18 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001212 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v17, v9, v18, v17, 0, 0 ; D2820011 04462509 V_ADD_I32_e32 v16, 0x4c, v16 ; 4A2020FF 0000004C BUFFER_LOAD_DWORD v16, s[0:3] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001010 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v16, v19, v16, v17, 0, 0 ; D2820010 04462113 V_MAD_F32 v1, v13, v16, v1, 0, 0 ; D2820001 0406210D V_ADD_I32_e32 v15, 1, v15 ; 4A1E1E81 V_LSHLREV_B32_e32 v15, 4, v15 ; 341E1E84 V_ADD_I32_e32 v16, 64, v15 ; 4A201EC0 BUFFER_LOAD_DWORD v16, s[0:3] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001010 V_ADD_I32_e32 v17, 0x44, v15 ; 4A221EFF 00000044 BUFFER_LOAD_DWORD v17, s[0:3] + v17 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001111 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v17, v8, v17 ; 10222308 V_MAD_F32 v16, v7, v16, v17, 0, 0 ; D2820010 04462107 V_ADD_I32_e32 v17, 0x48, v15 ; 4A221EFF 00000048 BUFFER_LOAD_DWORD v17, s[0:3] + v17 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001111 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v7, v9, v17, v16, 0, 0 ; D2820007 04422309 V_ADD_I32_e32 v8, 0x4c, v15 ; 4A101EFF 0000004C BUFFER_LOAD_DWORD v8, s[0:3] + v8 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000808 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v7, v19, v8, v7, 0, 0 ; D2820007 041E1113 V_MAD_F32 v2, v13, v7, v2, 0, 0 ; D2820002 040A0F0D S_OR_B64 exec, exec, s[6:7] ; 88FE067E S_OR_B64 exec, exec, s[4:5] ; 88FE047E S_BUFFER_LOAD_DWORD s6, s[0:3], 0xf ; C203010F S_BUFFER_LOAD_DWORD s7, s[0:3], 0xe ; C203810E S_BUFFER_LOAD_DWORD s8, s[0:3], 0xd ; C204010D S_BUFFER_LOAD_DWORD s9, s[0:3], 0xc ; C204810C S_BUFFER_LOAD_DWORD s10, s[0:3], 0xb ; C205010B S_BUFFER_LOAD_DWORD s11, s[0:3], 0xa ; C205810A S_BUFFER_LOAD_DWORD s12, s[0:3], 0x9 ; C2060109 S_BUFFER_LOAD_DWORD s13, s[0:3], 0x8 ; C2068108 S_BUFFER_LOAD_DWORD s14, s[0:3], 0x7 ; C2070107 S_BUFFER_LOAD_DWORD s15, s[0:3], 0x6 ; C2078106 S_BUFFER_LOAD_DWORD s16, s[0:3], 0x5 ; C2080105 S_BUFFER_LOAD_DWORD s17, s[0:3], 0x4 ; C2088104 S_BUFFER_LOAD_DWORD s18, s[0:3], 0x3 ; C2090103 S_BUFFER_LOAD_DWORD s19, s[0:3], 0x2 ; C2098102 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x1 ; C20A0101 S_BUFFER_LOAD_DWORD s21, s[0:3], 0x0 ; C20A8100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s6 ; 7E0E0206 V_MOV_B32_e32 v8, s7 ; 7E100207 V_MOV_B32_e32 v9, s8 ; 7E120208 V_MOV_B32_e32 v10, s9 ; 7E140209 V_MOV_B32_e32 v11, s10 ; 7E16020A V_MOV_B32_e32 v12, s11 ; 7E18020B V_MOV_B32_e32 v13, s12 ; 7E1A020C V_MOV_B32_e32 v14, s13 ; 7E1C020D V_MOV_B32_e32 v15, s14 ; 7E1E020E V_MOV_B32_e32 v16, s15 ; 7E20020F V_MOV_B32_e32 v17, s16 ; 7E220210 V_MOV_B32_e32 v18, s17 ; 7E240211 V_MOV_B32_e32 v19, s18 ; 7E260212 V_MOV_B32_e32 v20, s19 ; 7E280213 V_MOV_B32_e32 v21, s20 ; 7E2A0214 V_MOV_B32_e32 v22, s21 ; 7E2C0215 V_MOV_B32_e32 v23, 1.000000e+00 ; 7E2E02F2 EXP 15, 32, 0, 0, 0, v3, v4, v1, v23 ; F800020F 17010403 S_WAITCNT expcnt(0) ; BF8C070F V_MUL_F32_e32 v3, v2, v15 ; 10061F02 V_MAD_F32 v3, v0, v19, v3, 0, 0 ; D2820003 040E2700 V_MAD_F32 v3, v1, v11, v3, 0, 0 ; D2820003 040E1701 V_ADD_F32_e32 v3, v3, v7 ; 06060F03 V_MUL_F32_e32 v4, v2, v16 ; 10082102 V_MAD_F32 v4, v0, v20, v4, 0, 0 ; D2820004 04122900 V_MAD_F32 v4, v1, v12, v4, 0, 0 ; D2820004 04121901 V_ADD_F32_e32 v4, v4, v8 ; 06081104 V_MUL_F32_e32 v5, v2, v17 ; 100A2302 V_MAD_F32 v5, v0, v21, v5, 0, 0 ; D2820005 04162B00 V_MAD_F32 v5, v1, v13, v5, 0, 0 ; D2820005 04161B01 V_ADD_F32_e32 v5, v5, v9 ; 060A1305 V_MUL_F32_e32 v2, v2, v18 ; 10042502 V_MAD_F32 v0, v0, v22, v2, 0, 0 ; D2820000 040A2D00 V_MAD_F32 v0, v1, v14, v0, 0, 0 ; D2820000 04021D01 V_ADD_F32_e32 v0, v0, v10 ; 06001500 EXP 15, 12, 0, 1, 0, v0, v5, v4, v3 ; F80008CF 03040500 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.3300, 0.0000} 0: ADD TEMP[0].x, -CONST[0].xxxx, IN[0].zzzz 1: FSGE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 2: UIF TEMP[0].xxxx :0 3: MOV TEMP[0].x, IMM[0].yyyy 4: ELSE :0 5: MOV TEMP[0].x, IMM[0].zzzz 6: ENDIF 7: ADD TEMP[1].y, -CONST[1].xxxx, CONST[1].yyyy 8: MAD TEMP[1].y, CONST[1].zzzz, TEMP[1].yyyy, CONST[1].xxxx 9: MUL TEMP[0].w, TEMP[1].yyyy, TEMP[0].xxxx 10: MOV TEMP[0].w, TEMP[0].wwww 11: MOV TEMP[0].xyz, IMM[0].yyyy 12: MOV OUT[0], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = fsub float -0.000000e+00, %24 %30 = fadd float %29, %28 %31 = fcmp oge float %30, 0.000000e+00 %32 = sext i1 %31 to i32 %33 = bitcast i32 %32 to float %34 = bitcast float %33 to i32 %35 = icmp ne i32 %34, 0 %. = select i1 %35, float 1.000000e+00, float 0x3FD51EB860000000 %36 = fsub float -0.000000e+00, %25 %37 = fadd float %36, %26 %38 = fmul float %27, %37 %39 = fadd float %38, %25 %40 = fmul float %39, %. %41 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float 1.000000e+00, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 2, 0, [m0] ; C8080200 V_INTERP_P2_F32 v2, [v2], v1, 2, 0, [m0] ; C8090201 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s4, v2 ; 0A000404 V_CMP_GE_F32_e64 s[4:5], v0, 0.000000e+00, 0, 0 ; D00C0004 00010100 V_MOV_B32_e32 v0, 0x3ea8f5c3 ; 7E0002FF 3EA8F5C3 V_CNDMASK_B32_e64 v0, v0, 1.000000e+00, s[4:5], 0, 0, 0, 0 ; D2000000 0011E500 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x4 ; C2028104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s5 ; 7E020205 V_SUB_F32_e32 v2, s4, v1 ; 08040204 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x6 ; C2000106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, s0, v2, v1, 0, 0 ; D2820001 04060400 V_MUL_F32_e32 v0, v1, v0 ; 10000101 V_CVT_PKRTZ_F16_F32_e32 v0, 1.000000e+00, v0 ; 5E0000F2 V_CVT_PKRTZ_F16_F32_e64 v1, 1.000000e+00, 1.000000e+00, 0, 0 ; D25E0001 0001E4F2 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[5], IN[0].yyyy 1: MAD TEMP[0], IN[0].xxxx, CONST[4], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[6], TEMP[0] 3: ADD TEMP[0], TEMP[0], CONST[7] 4: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 5: MAD TEMP[1], TEMP[0].xxxx, CONST[0], TEMP[1] 6: MAD TEMP[1], TEMP[0].zzzz, CONST[2], TEMP[1] 7: MAD TEMP[1], TEMP[0].wwww, CONST[3], TEMP[1] 8: MOV TEMP[0].z, TEMP[0].zzzz 9: MOV TEMP[0].xy, IN[1].xyxx 10: MOV TEMP[0].w, IMM[0].xxxx 11: MOV OUT[1], TEMP[0] 12: MOV OUT[0], TEMP[1] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = fmul float %33, %50 %59 = fmul float %34, %50 %60 = fmul float %35, %50 %61 = fmul float %36, %50 %62 = fmul float %49, %29 %63 = fadd float %62, %58 %64 = fmul float %49, %30 %65 = fadd float %64, %59 %66 = fmul float %49, %31 %67 = fadd float %66, %60 %68 = fmul float %49, %32 %69 = fadd float %68, %61 %70 = fmul float %51, %37 %71 = fadd float %70, %63 %72 = fmul float %51, %38 %73 = fadd float %72, %65 %74 = fmul float %51, %39 %75 = fadd float %74, %67 %76 = fmul float %51, %40 %77 = fadd float %76, %69 %78 = fadd float %71, %41 %79 = fadd float %73, %42 %80 = fadd float %75, %43 %81 = fadd float %77, %44 %82 = fmul float %79, %17 %83 = fmul float %79, %18 %84 = fmul float %79, %19 %85 = fmul float %79, %20 %86 = fmul float %78, %13 %87 = fadd float %86, %82 %88 = fmul float %78, %14 %89 = fadd float %88, %83 %90 = fmul float %78, %15 %91 = fadd float %90, %84 %92 = fmul float %78, %16 %93 = fadd float %92, %85 %94 = fmul float %80, %21 %95 = fadd float %94, %87 %96 = fmul float %80, %22 %97 = fadd float %96, %89 %98 = fmul float %80, %23 %99 = fadd float %98, %91 %100 = fmul float %80, %24 %101 = fadd float %100, %93 %102 = fmul float %81, %25 %103 = fadd float %102, %95 %104 = fmul float %81, %26 %105 = fadd float %104, %97 %106 = fmul float %81, %27 %107 = fadd float %106, %99 %108 = fmul float %81, %28 %109 = fadd float %108, %101 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %56, float %57, float %80, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %103, float %105, float %107, float %109) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v2 ; 100A0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v1, v6, v5, 0, 0 ; D2820005 04160D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v3, v6, v5, 0, 0 ; D2820005 04160D03 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1e ; C202011E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[6:9], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010600 V_MOV_B32_e32 v0, 1.000000e+00 ; 7E0002F2 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v6, v7, v5, v0 ; F800020F 00050706 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v0, s4, v2 ; 10000404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v0, v1, v6, v0, 0, 0 ; D2820000 04020D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v0, v3, v6, v0, 0, 0 ; D2820000 04020D03 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1c ; C202011C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s4, v0 ; 06000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x11 ; C2020111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v6, v1, v7, v6, 0, 0 ; D2820006 041A0F01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v6, v3, v7, v6, 0, 0 ; D2820006 041A0F03 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1d ; C202011D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s4, v6 ; 060C0C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v6 ; 100E0C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v0, s4, v7, 0, 0 ; D2820007 041C0900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v5, s4, v7, 0, 0 ; D2820007 041C0905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s4, v2 ; 10100404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s4 ; 7E120204 V_MAD_F32 v8, v1, v9, v8, 0, 0 ; D2820008 04221301 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s4 ; 7E120204 V_MAD_F32 v1, v3, v9, v8, 0, 0 ; D2820001 04221303 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1f ; C202011F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v1, s4, v7, 0, 0 ; D2820002 041C0901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v6 ; 10060C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v0, s4, v3, 0, 0 ; D2820003 040C0900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v5, s4, v3, 0, 0 ; D2820003 040C0905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xe ; C202010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v1, s4, v3, 0, 0 ; D2820003 040C0901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v6 ; 10080C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v0, s4, v4, 0, 0 ; D2820004 04100900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v5, s4, v4, 0, 0 ; D2820004 04100905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v1, s4, v4, 0, 0 ; D2820004 04100901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v6 ; 100C0C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s4, v6, 0, 0 ; D2820000 04180900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v5, s4, v0, 0, 0 ; D2820000 04000905 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v1, s0, v0, 0, 0 ; D2820000 04000101 EXP 15, 12, 0, 1, 0, v0, v4, v3, v2 ; F80008CF 02030400 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.5000, 0.0000} IMM[1] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, -CONST[0].xxxx, IN[0].zzzz 1: MOV TEMP[1].xy, IN[0].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: POW TEMP[1].w, TEMP[1].wwww, IMM[0].yyyy 4: MUL TEMP[2].y, TEMP[1].wwww, IMM[0].zzzz 5: FSGE TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 6: UIF TEMP[0].xxxx :0 7: MOV TEMP[0].x, TEMP[1].wwww 8: ELSE :0 9: MOV TEMP[0].x, TEMP[2].yyyy 10: ENDIF 11: ADD TEMP[1].y, -TEMP[1].wwww, IMM[1].xxxx 12: FSGE TEMP[1].x, TEMP[1].yyyy, IMM[0].wwww 13: UIF TEMP[1].xxxx :0 14: MOV TEMP[1].x, IMM[0].wwww 15: ELSE :0 16: MOV TEMP[1].x, TEMP[0].xxxx 17: ENDIF 18: MOV TEMP[0].w, TEMP[1].xxxx 19: MOV TEMP[0].xyz, IMM[0].yyyy 20: MOV OUT[0], TEMP[0] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %26 = load <8 x i32> addrspace(2)* %25, !tbaa !0 %27 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %28 = load <4 x i32> addrspace(2)* %27, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %32 = fsub float -0.000000e+00, %24 %33 = fadd float %32, %31 %34 = bitcast float %29 to i32 %35 = bitcast float %30 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %26 to <32 x i8> %39 = bitcast <4 x i32> %28 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 3 %42 = call float @llvm.pow.f32(float %41, float 1.000000e+00) %43 = fmul float %42, 5.000000e-01 %44 = fcmp oge float %33, 0.000000e+00 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 %. = select i1 %48, float %42, float %43 %49 = fsub float -0.000000e+00, %42 %50 = fadd float %49, 0x3FB99999A0000000 %51 = fcmp oge float %50, 0.000000e+00 %52 = sext i1 %51 to i32 %53 = bitcast i32 %52 to float %54 = bitcast float %53 to i32 %55 = icmp ne i32 %54, 0 %temp4.0 = select i1 %55, float 0.000000e+00, float %. %56 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float 1.000000e+00, float %temp4.0) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x0 ; C0860500 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x0 ; C0C80700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800800 00640202 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v2, v2 ; 7E044F02 V_MUL_LEGACY_F32_e32 v2, 1.000000e+00, v2 ; 0E0404F2 V_EXP_F32_e32 v2, v2 ; 7E044B02 V_MUL_F32_e32 v3, 5.000000e-01, v2 ; 100604F0 V_INTERP_P1_F32 v4, v0, 2, 0, [m0] ; C8100200 V_INTERP_P2_F32 v4, [v4], v1, 2, 0, [m0] ; C8110201 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s0, v4 ; 0A000800 V_CMP_GE_F32_e64 s[0:1], v0, 0.000000e+00, 0, 0 ; D00C0000 00010100 V_CNDMASK_B32_e64 v0, v3, v2, s[0:1], 0, 0, 0, 0 ; D2000000 00020503 V_SUB_F32_e32 v1, 1.000000e-01, v2 ; 080204FF 3DCCCCCD V_CMP_GE_F32_e64 s[0:1], v1, 0.000000e+00, 0, 0 ; D00C0000 00010101 V_CNDMASK_B32_e64 v0, v0, 0, s[0:1], 0, 0, 0, 0 ; D2000000 00010100 V_CVT_PKRTZ_F16_F32_e32 v0, 1.000000e+00, v0 ; 5E0000F2 V_CVT_PKRTZ_F16_F32_e64 v1, 1.000000e+00, 1.000000e+00, 0, 0 ; D25E0001 0001E4F2 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..99] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+4] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[0].x, TEMP[0].xxxx 12: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 13: UARL ADDR[0].x, TEMP[0].xxxx 14: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 15: MOV TEMP[2].z, TEMP[0].xxxx 16: MUL TEMP[0].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[0].xyzx 18: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 19: UIF TEMP[0].xxxx :0 20: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 21: MOV TEMP[2].w, TEMP[0].wwww 22: F2I TEMP[3].x, TEMP[0].wwww 23: UARL ADDR[0].x, TEMP[3].xxxx 24: UARL ADDR[0].x, TEMP[3].xxxx 25: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 26: F2I TEMP[4].x, TEMP[0].wwww 27: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 28: UARL ADDR[0].x, TEMP[4].xxxx 29: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 30: MOV TEMP[3].y, TEMP[4].xxxx 31: F2I TEMP[0].x, TEMP[0].wwww 32: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 33: UARL ADDR[0].x, TEMP[0].xxxx 34: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 35: MOV TEMP[3].z, TEMP[0].xxxx 36: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[3], TEMP[2] 37: MOV TEMP[2].xyz, TEMP[0].xyzx 38: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 39: UIF TEMP[0].xxxx :0 40: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 41: MOV TEMP[2].w, TEMP[0].wwww 42: F2I TEMP[4].x, TEMP[0].wwww 43: UARL ADDR[0].x, TEMP[4].xxxx 44: UARL ADDR[0].x, TEMP[4].xxxx 45: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 46: F2I TEMP[4].x, TEMP[0].wwww 47: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 48: UARL ADDR[0].x, TEMP[4].xxxx 49: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 50: MOV TEMP[3].y, TEMP[4].xxxx 51: F2I TEMP[0].x, TEMP[0].wwww 52: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 53: UARL ADDR[0].x, TEMP[0].xxxx 54: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 55: MOV TEMP[3].z, TEMP[0].xxxx 56: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[3], TEMP[2] 57: MOV TEMP[2].xyz, TEMP[0].xyzx 58: ENDIF 59: ENDIF 60: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 61: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 62: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 63: ADD TEMP[0], TEMP[1], CONST[3] 64: MOV TEMP[2].xy, IN[3].xyxx 65: MOV TEMP[1].xyz, TEMP[2].xyzx 66: MOV TEMP[1].w, IMM[0].yyyy 67: MOV OUT[1], TEMP[1] 68: MOV OUT[0], TEMP[0] 69: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = extractelement <4 x float> %39, i32 2 %43 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = fmul float 3.000000e+00, %47 %57 = fmul float %33, 1.000000e+00 %58 = fadd float %57, 0.000000e+00 %59 = fmul float %34, 1.000000e+00 %60 = fadd float %59, 0.000000e+00 %61 = fmul float %35, 1.000000e+00 %62 = fadd float %61, 0.000000e+00 %63 = fmul float %33, 0.000000e+00 %64 = fadd float %63, 1.000000e+00 %65 = fptosi float %56 to i32 %66 = bitcast i32 %65 to float %67 = bitcast float %66 to i32 %68 = shl i32 %67, 4 %69 = add i32 %68, 64 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = shl i32 %67, 4 %72 = add i32 %71, 68 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = shl i32 %67, 4 %75 = add i32 %74, 72 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %67, 4 %78 = add i32 %77, 76 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = fmul float %58, %70 %81 = fmul float %60, %73 %82 = fadd float %80, %81 %83 = fmul float %62, %76 %84 = fadd float %82, %83 %85 = fmul float %64, %79 %86 = fadd float %84, %85 %87 = fptosi float %56 to i32 %88 = bitcast i32 %87 to float %89 = bitcast float %88 to i32 %90 = add i32 1, %89 %91 = bitcast i32 %90 to float %92 = bitcast float %91 to i32 %93 = shl i32 %92, 4 %94 = add i32 %93, 64 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %92, 4 %97 = add i32 %96, 68 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = shl i32 %92, 4 %100 = add i32 %99, 72 %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %100) %102 = shl i32 %92, 4 %103 = add i32 %102, 76 %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %103) %105 = fmul float %58, %95 %106 = fmul float %60, %98 %107 = fadd float %105, %106 %108 = fmul float %62, %101 %109 = fadd float %107, %108 %110 = fmul float %64, %104 %111 = fadd float %109, %110 %112 = fptosi float %56 to i32 %113 = bitcast i32 %112 to float %114 = bitcast float %113 to i32 %115 = add i32 2, %114 %116 = bitcast i32 %115 to float %117 = bitcast float %116 to i32 %118 = shl i32 %117, 4 %119 = add i32 %118, 64 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %117, 4 %122 = add i32 %121, 68 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = shl i32 %117, 4 %125 = add i32 %124, 72 %126 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %125) %127 = shl i32 %117, 4 %128 = add i32 %127, 76 %129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %128) %130 = fmul float %58, %120 %131 = fmul float %60, %123 %132 = fadd float %130, %131 %133 = fmul float %62, %126 %134 = fadd float %132, %133 %135 = fmul float %64, %129 %136 = fadd float %134, %135 %137 = fmul float %86, %40 %138 = fmul float %111, %40 %139 = fmul float %136, %40 %140 = fcmp olt float 0.000000e+00, %41 %141 = sext i1 %140 to i32 %142 = bitcast i32 %141 to float %143 = bitcast float %142 to i32 %144 = icmp ne i32 %143, 0 br i1 %144, label %IF, label %ENDIF IF: ; preds = %main_body %145 = fmul float 3.000000e+00, %48 %146 = fptosi float %145 to i32 %147 = bitcast i32 %146 to float %148 = bitcast float %147 to i32 %149 = shl i32 %148, 4 %150 = add i32 %149, 64 %151 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %150) %152 = shl i32 %148, 4 %153 = add i32 %152, 68 %154 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %153) %155 = shl i32 %148, 4 %156 = add i32 %155, 72 %157 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %156) %158 = shl i32 %148, 4 %159 = add i32 %158, 76 %160 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %159) %161 = fmul float %58, %151 %162 = fmul float %60, %154 %163 = fadd float %161, %162 %164 = fmul float %62, %157 %165 = fadd float %163, %164 %166 = fmul float %64, %160 %167 = fadd float %165, %166 %168 = fptosi float %145 to i32 %169 = bitcast i32 %168 to float %170 = bitcast float %169 to i32 %171 = add i32 1, %170 %172 = bitcast i32 %171 to float %173 = bitcast float %172 to i32 %174 = shl i32 %173, 4 %175 = add i32 %174, 64 %176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %175) %177 = shl i32 %173, 4 %178 = add i32 %177, 68 %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %178) %180 = shl i32 %173, 4 %181 = add i32 %180, 72 %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %181) %183 = shl i32 %173, 4 %184 = add i32 %183, 76 %185 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %184) %186 = fmul float %58, %176 %187 = fmul float %60, %179 %188 = fadd float %186, %187 %189 = fmul float %62, %182 %190 = fadd float %188, %189 %191 = fmul float %64, %185 %192 = fadd float %190, %191 %193 = fptosi float %145 to i32 %194 = bitcast i32 %193 to float %195 = bitcast float %194 to i32 %196 = add i32 2, %195 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = shl i32 %198, 4 %200 = add i32 %199, 64 %201 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %200) %202 = shl i32 %198, 4 %203 = add i32 %202, 68 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %198, 4 %206 = add i32 %205, 72 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %198, 4 %209 = add i32 %208, 76 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %58, %201 %212 = fmul float %60, %204 %213 = fadd float %211, %212 %214 = fmul float %62, %207 %215 = fadd float %213, %214 %216 = fmul float %64, %210 %217 = fadd float %215, %216 %218 = fmul float %41, %167 %219 = fadd float %218, %137 %220 = fmul float %41, %192 %221 = fadd float %220, %138 %222 = fmul float %41, %217 %223 = fadd float %222, %139 %224 = fcmp olt float 0.000000e+00, %42 %225 = sext i1 %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = icmp ne i32 %227, 0 br i1 %228, label %IF44, label %ENDIF ENDIF: ; preds = %IF44, %IF, %main_body %temp8.0 = phi float [ %137, %main_body ], [ %327, %IF44 ], [ %219, %IF ] %temp9.0 = phi float [ %138, %main_body ], [ %329, %IF44 ], [ %221, %IF ] %temp10.0 = phi float [ %139, %main_body ], [ %331, %IF44 ], [ %223, %IF ] %229 = fmul float %temp9.0, %17 %230 = fmul float %temp9.0, %18 %231 = fmul float %temp9.0, %19 %232 = fmul float %temp9.0, %20 %233 = fmul float %temp8.0, %13 %234 = fadd float %233, %229 %235 = fmul float %temp8.0, %14 %236 = fadd float %235, %230 %237 = fmul float %temp8.0, %15 %238 = fadd float %237, %231 %239 = fmul float %temp8.0, %16 %240 = fadd float %239, %232 %241 = fmul float %temp10.0, %21 %242 = fadd float %241, %234 %243 = fmul float %temp10.0, %22 %244 = fadd float %243, %236 %245 = fmul float %temp10.0, %23 %246 = fadd float %245, %238 %247 = fmul float %temp10.0, %24 %248 = fadd float %247, %240 %249 = fadd float %242, %25 %250 = fadd float %244, %26 %251 = fadd float %246, %27 %252 = fadd float %248, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %54, float %55, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %249, float %250, float %251, float %252) ret void IF44: ; preds = %IF %253 = fmul float 3.000000e+00, %49 %254 = fptosi float %253 to i32 %255 = bitcast i32 %254 to float %256 = bitcast float %255 to i32 %257 = shl i32 %256, 4 %258 = add i32 %257, 64 %259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %258) %260 = shl i32 %256, 4 %261 = add i32 %260, 68 %262 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %261) %263 = shl i32 %256, 4 %264 = add i32 %263, 72 %265 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %264) %266 = shl i32 %256, 4 %267 = add i32 %266, 76 %268 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %267) %269 = fmul float %58, %259 %270 = fmul float %60, %262 %271 = fadd float %269, %270 %272 = fmul float %62, %265 %273 = fadd float %271, %272 %274 = fmul float %64, %268 %275 = fadd float %273, %274 %276 = fptosi float %253 to i32 %277 = bitcast i32 %276 to float %278 = bitcast float %277 to i32 %279 = add i32 1, %278 %280 = bitcast i32 %279 to float %281 = bitcast float %280 to i32 %282 = shl i32 %281, 4 %283 = add i32 %282, 64 %284 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %283) %285 = shl i32 %281, 4 %286 = add i32 %285, 68 %287 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %286) %288 = shl i32 %281, 4 %289 = add i32 %288, 72 %290 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %289) %291 = shl i32 %281, 4 %292 = add i32 %291, 76 %293 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %292) %294 = fmul float %58, %284 %295 = fmul float %60, %287 %296 = fadd float %294, %295 %297 = fmul float %62, %290 %298 = fadd float %296, %297 %299 = fmul float %64, %293 %300 = fadd float %298, %299 %301 = fptosi float %253 to i32 %302 = bitcast i32 %301 to float %303 = bitcast float %302 to i32 %304 = add i32 2, %303 %305 = bitcast i32 %304 to float %306 = bitcast float %305 to i32 %307 = shl i32 %306, 4 %308 = add i32 %307, 64 %309 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %308) %310 = shl i32 %306, 4 %311 = add i32 %310, 68 %312 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %311) %313 = shl i32 %306, 4 %314 = add i32 %313, 72 %315 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %314) %316 = shl i32 %306, 4 %317 = add i32 %316, 76 %318 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %317) %319 = fmul float %58, %309 %320 = fmul float %60, %312 %321 = fadd float %319, %320 %322 = fmul float %62, %315 %323 = fadd float %321, %322 %324 = fmul float %64, %318 %325 = fadd float %323, %324 %326 = fmul float %42, %275 %327 = fadd float %326, %219 %328 = fmul float %42, %300 %329 = fadd float %328, %221 %330 = fmul float %42, %325 %331 = fadd float %330, %223 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v3, s10, v0 ; 4A06000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[15:18], s[4:7][v3] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010F03 V_MOV_B32_e32 v0, 3.000000e+00 ; 7E0002FF 40400000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v15, v0 ; 1000010F V_CVT_I32_F32_e32 v2, v0 ; 7E041100 V_LSHLREV_B32_e32 v0, 4, v2 ; 34000484 V_ADD_I32_e32 v1, 0x44, v0 ; 4A0200FF 00000044 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[7:10], s[4:7][v3] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010703 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v1, v8, v1 ; 10020308 V_ADD_I32_e32 v4, 64, v0 ; 4A0800C0 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v7, v4, v1, 0, 0 ; D2820001 04060907 V_ADD_I32_e32 v4, 0x48, v0 ; 4A0800FF 00000048 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v9, v4, v1, 0, 0 ; D2820001 04060909 V_ADD_I32_e32 v0, 0x4c, v0 ; 4A0000FF 0000004C BUFFER_LOAD_DWORD v0, s[0:3] + v0 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v1, v0 ; 06000101 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[11:14], s[4:7][v3] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010B03 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v0, v11 ; 10001700 V_ADD_I32_e32 v1, 2, v2 ; 4A020482 V_LSHLREV_B32_e32 v1, 4, v1 ; 34020284 V_ADD_I32_e32 v4, 0x44, v1 ; 4A0802FF 00000044 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v8, v4 ; 10080908 V_ADD_I32_e32 v5, 64, v1 ; 4A0A02C0 BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v7, v5, v4, 0, 0 ; D2820004 04120B07 V_ADD_I32_e32 v5, 0x48, v1 ; 4A0A02FF 00000048 BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v9, v5, v4, 0, 0 ; D2820004 04120B09 V_ADD_I32_e32 v1, 0x4c, v1 ; 4A0202FF 0000004C BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v1, v4, v1 ; 06020304 V_MUL_F32_e32 v1, v1, v11 ; 10021701 V_ADD_I32_e32 v2, 1, v2 ; 4A040481 V_LSHLREV_B32_e32 v2, 4, v2 ; 34040484 V_ADD_I32_e32 v4, 0x44, v2 ; 4A0804FF 00000044 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v8, v4 ; 10080908 V_ADD_I32_e32 v5, 64, v2 ; 4A0A04C0 BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v7, v5, v4, 0, 0 ; D2820004 04120B07 V_ADD_I32_e32 v5, 0x48, v2 ; 4A0A04FF 00000048 BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v9, v5, v4, 0, 0 ; D2820004 04120B09 V_ADD_I32_e32 v2, 0x4c, v2 ; 4A0404FF 0000004C BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v2, v4, v2 ; 06040504 V_MUL_F32_e32 v2, v2, v11 ; 10041702 S_LOAD_DWORDX4 s[4:7], s[8:9], 0xc ; C082090C S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[3:6], s[4:7][v3] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010303 V_CMP_GT_F32_e64 s[4:5], v12, 0.000000e+00, 0, 0 ; D0080004 0001010C V_MOV_B32_e32 v19, 1.000000e+00 ; 7E2602F2 S_WAITCNT vmcnt(0) ; BF8C0770 S_AND_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842404 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E S_CBRANCH_EXECZ BB0_3 ; BF880000 V_MOV_B32_e32 v20, 3.000000e+00 ; 7E2802FF 40400000 V_MUL_F32_e32 v20, v16, v20 ; 10282910 V_CVT_I32_F32_e32 v20, v20 ; 7E281114 V_LSHLREV_B32_e32 v21, 4, v20 ; 342A2884 V_ADD_I32_e32 v22, 64, v21 ; 4A2C2AC0 BUFFER_LOAD_DWORD v22, s[0:3] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001616 V_ADD_I32_e32 v23, 0x44, v21 ; 4A2E2AFF 00000044 BUFFER_LOAD_DWORD v23, s[0:3] + v23 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001717 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v23, v8, v23 ; 102E2F08 V_MAD_F32 v22, v7, v22, v23, 0, 0 ; D2820016 045E2D07 V_ADD_I32_e32 v23, 0x48, v21 ; 4A2E2AFF 00000048 BUFFER_LOAD_DWORD v23, s[0:3] + v23 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001717 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v22, v9, v23, v22, 0, 0 ; D2820016 045A2F09 V_ADD_I32_e32 v21, 0x4c, v21 ; 4A2A2AFF 0000004C BUFFER_LOAD_DWORD v21, s[0:3] + v21 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001515 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v21, v19, v21, v22, 0, 0 ; D2820015 045A2B13 V_MAD_F32 v0, v12, v21, v0, 0, 0 ; D2820000 04022B0C V_ADD_I32_e32 v21, 2, v20 ; 4A2A2882 V_LSHLREV_B32_e32 v21, 4, v21 ; 342A2A84 V_ADD_I32_e32 v22, 64, v21 ; 4A2C2AC0 BUFFER_LOAD_DWORD v22, s[0:3] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001616 V_ADD_I32_e32 v23, 0x44, v21 ; 4A2E2AFF 00000044 BUFFER_LOAD_DWORD v23, s[0:3] + v23 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001717 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v23, v8, v23 ; 102E2F08 V_MAD_F32 v22, v7, v22, v23, 0, 0 ; D2820016 045E2D07 V_ADD_I32_e32 v23, 0x48, v21 ; 4A2E2AFF 00000048 BUFFER_LOAD_DWORD v23, s[0:3] + v23 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001717 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v22, v9, v23, v22, 0, 0 ; D2820016 045A2F09 V_ADD_I32_e32 v21, 0x4c, v21 ; 4A2A2AFF 0000004C BUFFER_LOAD_DWORD v21, s[0:3] + v21 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001515 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v21, v19, v21, v22, 0, 0 ; D2820015 045A2B13 V_MAD_F32 v1, v12, v21, v1, 0, 0 ; D2820001 04062B0C V_ADD_I32_e32 v20, 1, v20 ; 4A282881 V_LSHLREV_B32_e32 v20, 4, v20 ; 34282884 V_ADD_I32_e32 v21, 64, v20 ; 4A2A28C0 BUFFER_LOAD_DWORD v21, s[0:3] + v21 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001515 V_ADD_I32_e32 v22, 0x44, v20 ; 4A2C28FF 00000044 BUFFER_LOAD_DWORD v22, s[0:3] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001616 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v22, v8, v22 ; 102C2D08 V_MAD_F32 v21, v7, v21, v22, 0, 0 ; D2820015 045A2B07 V_ADD_I32_e32 v22, 0x48, v20 ; 4A2C28FF 00000048 BUFFER_LOAD_DWORD v22, s[0:3] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001616 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v21, v9, v22, v21, 0, 0 ; D2820015 04562D09 V_ADD_I32_e32 v20, 0x4c, v20 ; 4A2828FF 0000004C BUFFER_LOAD_DWORD v20, s[0:3] + v20 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001414 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v20, v19, v20, v21, 0, 0 ; D2820014 04562913 V_MAD_F32 v2, v12, v20, v2, 0, 0 ; D2820002 040A290C V_CMP_GT_F32_e64 s[6:7], v13, 0.000000e+00, 0, 0 ; D0080006 0001010D S_AND_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862406 S_XOR_B64 s[6:7], exec, s[6:7] ; 8986067E S_CBRANCH_EXECZ BB0_2 ; BF880000 V_MOV_B32_e32 v20, 3.000000e+00 ; 7E2802FF 40400000 V_MUL_F32_e32 v15, v17, v20 ; 101E2911 V_CVT_I32_F32_e32 v15, v15 ; 7E1E110F V_LSHLREV_B32_e32 v16, 4, v15 ; 34201E84 V_ADD_I32_e32 v17, 64, v16 ; 4A2220C0 BUFFER_LOAD_DWORD v17, s[0:3] + v17 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001111 V_ADD_I32_e32 v18, 0x44, v16 ; 4A2420FF 00000044 BUFFER_LOAD_DWORD v18, s[0:3] + v18 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001212 S_WAITCNT vmcnt(0) expcnt(0) ; BF8C0700 V_MUL_F32_e32 v18, v8, v18 ; 10242508 V_MAD_F32 v17, v7, v17, v18, 0, 0 ; D2820011 044A2307 V_ADD_I32_e32 v18, 0x48, v16 ; 4A2420FF 00000048 BUFFER_LOAD_DWORD v18, s[0:3] + v18 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001212 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v17, v9, v18, v17, 0, 0 ; D2820011 04462509 V_ADD_I32_e32 v16, 0x4c, v16 ; 4A2020FF 0000004C BUFFER_LOAD_DWORD v16, s[0:3] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001010 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v16, v19, v16, v17, 0, 0 ; D2820010 04462113 V_MAD_F32 v0, v13, v16, v0, 0, 0 ; D2820000 0402210D V_ADD_I32_e32 v16, 2, v15 ; 4A201E82 V_LSHLREV_B32_e32 v16, 4, v16 ; 34202084 V_ADD_I32_e32 v17, 64, v16 ; 4A2220C0 BUFFER_LOAD_DWORD v17, s[0:3] + v17 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001111 V_ADD_I32_e32 v18, 0x44, v16 ; 4A2420FF 00000044 BUFFER_LOAD_DWORD v18, s[0:3] + v18 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001212 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v18, v8, v18 ; 10242508 V_MAD_F32 v17, v7, v17, v18, 0, 0 ; D2820011 044A2307 V_ADD_I32_e32 v18, 0x48, v16 ; 4A2420FF 00000048 BUFFER_LOAD_DWORD v18, s[0:3] + v18 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001212 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v17, v9, v18, v17, 0, 0 ; D2820011 04462509 V_ADD_I32_e32 v16, 0x4c, v16 ; 4A2020FF 0000004C BUFFER_LOAD_DWORD v16, s[0:3] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001010 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v16, v19, v16, v17, 0, 0 ; D2820010 04462113 V_MAD_F32 v1, v13, v16, v1, 0, 0 ; D2820001 0406210D V_ADD_I32_e32 v15, 1, v15 ; 4A1E1E81 V_LSHLREV_B32_e32 v15, 4, v15 ; 341E1E84 V_ADD_I32_e32 v16, 64, v15 ; 4A201EC0 BUFFER_LOAD_DWORD v16, s[0:3] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001010 V_ADD_I32_e32 v17, 0x44, v15 ; 4A221EFF 00000044 BUFFER_LOAD_DWORD v17, s[0:3] + v17 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001111 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v17, v8, v17 ; 10222308 V_MAD_F32 v16, v7, v16, v17, 0, 0 ; D2820010 04462107 V_ADD_I32_e32 v17, 0x48, v15 ; 4A221EFF 00000048 BUFFER_LOAD_DWORD v17, s[0:3] + v17 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001111 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v7, v9, v17, v16, 0, 0 ; D2820007 04422309 V_ADD_I32_e32 v8, 0x4c, v15 ; 4A101EFF 0000004C BUFFER_LOAD_DWORD v8, s[0:3] + v8 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000808 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v7, v19, v8, v7, 0, 0 ; D2820007 041E1113 V_MAD_F32 v2, v13, v7, v2, 0, 0 ; D2820002 040A0F0D S_OR_B64 exec, exec, s[6:7] ; 88FE067E S_OR_B64 exec, exec, s[4:5] ; 88FE047E S_BUFFER_LOAD_DWORD s6, s[0:3], 0xf ; C203010F S_BUFFER_LOAD_DWORD s7, s[0:3], 0xe ; C203810E S_BUFFER_LOAD_DWORD s8, s[0:3], 0xd ; C204010D S_BUFFER_LOAD_DWORD s9, s[0:3], 0xc ; C204810C S_BUFFER_LOAD_DWORD s10, s[0:3], 0xb ; C205010B S_BUFFER_LOAD_DWORD s11, s[0:3], 0xa ; C205810A S_BUFFER_LOAD_DWORD s12, s[0:3], 0x9 ; C2060109 S_BUFFER_LOAD_DWORD s13, s[0:3], 0x8 ; C2068108 S_BUFFER_LOAD_DWORD s14, s[0:3], 0x7 ; C2070107 S_BUFFER_LOAD_DWORD s15, s[0:3], 0x6 ; C2078106 S_BUFFER_LOAD_DWORD s16, s[0:3], 0x5 ; C2080105 S_BUFFER_LOAD_DWORD s17, s[0:3], 0x4 ; C2088104 S_BUFFER_LOAD_DWORD s18, s[0:3], 0x3 ; C2090103 S_BUFFER_LOAD_DWORD s19, s[0:3], 0x2 ; C2098102 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x1 ; C20A0101 S_BUFFER_LOAD_DWORD s21, s[0:3], 0x0 ; C20A8100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s6 ; 7E0E0206 V_MOV_B32_e32 v8, s7 ; 7E100207 V_MOV_B32_e32 v9, s8 ; 7E120208 V_MOV_B32_e32 v10, s9 ; 7E140209 V_MOV_B32_e32 v11, s10 ; 7E16020A V_MOV_B32_e32 v12, s11 ; 7E18020B V_MOV_B32_e32 v13, s12 ; 7E1A020C V_MOV_B32_e32 v14, s13 ; 7E1C020D V_MOV_B32_e32 v15, s14 ; 7E1E020E V_MOV_B32_e32 v16, s15 ; 7E20020F V_MOV_B32_e32 v17, s16 ; 7E220210 V_MOV_B32_e32 v18, s17 ; 7E240211 V_MOV_B32_e32 v19, s18 ; 7E260212 V_MOV_B32_e32 v20, s19 ; 7E280213 V_MOV_B32_e32 v21, s20 ; 7E2A0214 V_MOV_B32_e32 v22, s21 ; 7E2C0215 V_MOV_B32_e32 v23, 1.000000e+00 ; 7E2E02F2 EXP 15, 32, 0, 0, 0, v3, v4, v1, v23 ; F800020F 17010403 S_WAITCNT expcnt(0) ; BF8C070F V_MUL_F32_e32 v3, v2, v15 ; 10061F02 V_MAD_F32 v3, v0, v19, v3, 0, 0 ; D2820003 040E2700 V_MAD_F32 v3, v1, v11, v3, 0, 0 ; D2820003 040E1701 V_ADD_F32_e32 v3, v3, v7 ; 06060F03 V_MUL_F32_e32 v4, v2, v16 ; 10082102 V_MAD_F32 v4, v0, v20, v4, 0, 0 ; D2820004 04122900 V_MAD_F32 v4, v1, v12, v4, 0, 0 ; D2820004 04121901 V_ADD_F32_e32 v4, v4, v8 ; 06081104 V_MUL_F32_e32 v5, v2, v17 ; 100A2302 V_MAD_F32 v5, v0, v21, v5, 0, 0 ; D2820005 04162B00 V_MAD_F32 v5, v1, v13, v5, 0, 0 ; D2820005 04161B01 V_ADD_F32_e32 v5, v5, v9 ; 060A1305 V_MUL_F32_e32 v2, v2, v18 ; 10042502 V_MAD_F32 v0, v0, v22, v2, 0, 0 ; D2820000 040A2D00 V_MAD_F32 v0, v1, v14, v0, 0, 0 ; D2820000 04021D01 V_ADD_F32_e32 v0, v0, v10 ; 06001500 EXP 15, 12, 0, 1, 0, v0, v5, v4, v3 ; F80008CF 03040500 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.5000, 0.0000} IMM[1] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, -CONST[0].xxxx, IN[0].zzzz 1: MOV TEMP[1].xy, IN[0].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: POW TEMP[1].w, TEMP[1].wwww, IMM[0].yyyy 4: MUL TEMP[2].y, TEMP[1].wwww, IMM[0].zzzz 5: FSGE TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 6: UIF TEMP[0].xxxx :0 7: MOV TEMP[0].x, TEMP[1].wwww 8: ELSE :0 9: MOV TEMP[0].x, TEMP[2].yyyy 10: ENDIF 11: ADD TEMP[1].y, -TEMP[1].wwww, IMM[1].xxxx 12: FSGE TEMP[1].x, TEMP[1].yyyy, IMM[0].wwww 13: UIF TEMP[1].xxxx :0 14: MOV TEMP[1].x, IMM[0].wwww 15: ELSE :0 16: MOV TEMP[1].x, TEMP[0].xxxx 17: ENDIF 18: MOV TEMP[0].w, TEMP[1].xxxx 19: MOV TEMP[0].xyz, IMM[0].yyyy 20: MOV OUT[0], TEMP[0] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %26 = load <8 x i32> addrspace(2)* %25, !tbaa !0 %27 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %28 = load <4 x i32> addrspace(2)* %27, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %32 = fsub float -0.000000e+00, %24 %33 = fadd float %32, %31 %34 = bitcast float %29 to i32 %35 = bitcast float %30 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %26 to <32 x i8> %39 = bitcast <4 x i32> %28 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 3 %42 = call float @llvm.pow.f32(float %41, float 1.000000e+00) %43 = fmul float %42, 5.000000e-01 %44 = fcmp oge float %33, 0.000000e+00 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 %. = select i1 %48, float %42, float %43 %49 = fsub float -0.000000e+00, %42 %50 = fadd float %49, 0x3FB99999A0000000 %51 = fcmp oge float %50, 0.000000e+00 %52 = sext i1 %51 to i32 %53 = bitcast i32 %52 to float %54 = bitcast float %53 to i32 %55 = icmp ne i32 %54, 0 %temp4.0 = select i1 %55, float 0.000000e+00, float %. %56 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float 1.000000e+00, float %temp4.0) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x0 ; C0860500 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x0 ; C0C80700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800800 00640202 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v2, v2 ; 7E044F02 V_MUL_LEGACY_F32_e32 v2, 1.000000e+00, v2 ; 0E0404F2 V_EXP_F32_e32 v2, v2 ; 7E044B02 V_MUL_F32_e32 v3, 5.000000e-01, v2 ; 100604F0 V_INTERP_P1_F32 v4, v0, 2, 0, [m0] ; C8100200 V_INTERP_P2_F32 v4, [v4], v1, 2, 0, [m0] ; C8110201 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[0:3], 0x0 ; C2000100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s0, v4 ; 0A000800 V_CMP_GE_F32_e64 s[0:1], v0, 0.000000e+00, 0, 0 ; D00C0000 00010100 V_CNDMASK_B32_e64 v0, v3, v2, s[0:1], 0, 0, 0, 0 ; D2000000 00020503 V_SUB_F32_e32 v1, 1.000000e-01, v2 ; 080204FF 3DCCCCCD V_CMP_GE_F32_e64 s[0:1], v1, 0.000000e+00, 0, 0 ; D00C0000 00010101 V_CNDMASK_B32_e64 v0, v0, 0, s[0:1], 0, 0, 0, 0 ; D2000000 00010100 V_CVT_PKRTZ_F16_F32_e32 v0, 1.000000e+00, v0 ; 5E0000F2 V_CVT_PKRTZ_F16_F32_e64 v1, 1.000000e+00, 1.000000e+00, 0, 0 ; D25E0001 0001E4F2 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xyzx, IMM[0].xxxy, IMM[0].yyyx 1: MOV TEMP[1].xy, IN[1].xyxx 2: MOV TEMP[1].zw, IMM[0].xxyx 3: MOV OUT[1], TEMP[1] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %15, 1.000000e+00 %25 = fadd float %24, 0.000000e+00 %26 = fmul float %16, 1.000000e+00 %27 = fadd float %26, 0.000000e+00 %28 = fmul float %17, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %15, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %22, float %23, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[0:3], s[8:9], 0x4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000100 V_MOV_B32_e32 v5, 1.000000e+00 ; 7E0A02F2 V_MOV_B32_e32 v6, 0.000000e+00 ; 7E0C0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v6, v5 ; F800020F 05060201 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x0 ; C0800900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v5 ; F80008CF 05020100 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 0.5000} IMM[1] FLT32 { 0.1250, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[0], IN[0] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MAD TEMP[1].xy, TEMP[0], IMM[0].xyxx, IMM[0].zxzz 3: MOV TEMP[2].xy, TEMP[1].xyyy 4: TEX TEMP[2].zw, TEMP[2], SAMP[0], 2D 5: MOV TEMP[0].zw, TEMP[2].wwzw 6: ADD TEMP[3].xy, CONST[0].xwzw, IN[0] 7: MOV TEMP[0].xy, TEMP[3].xyxx 8: MAD TEMP[1].xy, TEMP[0], IMM[0].xyxx, IMM[0].zxzz 9: MOV TEMP[3].xy, TEMP[1].xyyy 10: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 11: ADD TEMP[2].x, TEMP[2].wwww, TEMP[3].wwww 12: ADD TEMP[3].yz, CONST[0].xzyw, IN[0].xxyw 13: MOV TEMP[0].yz, TEMP[3].zyzz 14: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 15: MOV TEMP[3].xy, TEMP[1].xyyy 16: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 17: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 18: ADD TEMP[3].yz, CONST[0].xzww, IN[0].xxyw 19: MOV TEMP[0].yz, TEMP[3].zyzz 20: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 21: MOV TEMP[3].xy, TEMP[1].xyyy 22: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 23: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 24: MAD TEMP[3].yz, CONST[0].xxyw, IMM[0].wwww, IN[0].xxyw 25: MOV TEMP[0].yz, TEMP[3].zyzz 26: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 27: MOV TEMP[3].xy, TEMP[1].xyyy 28: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 29: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 30: MAD TEMP[3].yz, CONST[0].xxww, IMM[0].wwww, IN[0].xxyw 31: MOV TEMP[0].yz, TEMP[3].zyzz 32: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 33: MOV TEMP[3].xy, TEMP[1].xyyy 34: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 35: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 36: MAD TEMP[3].yz, CONST[0].xzyw, IMM[0].wwww, IN[0].xxyw 37: MOV TEMP[0].yz, TEMP[3].zyzz 38: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 39: MOV TEMP[3].xy, TEMP[1].xyyy 40: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 41: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 42: MAD TEMP[3].yz, CONST[0].xzww, IMM[0].wwww, IN[0].xxyw 43: MOV TEMP[0].yz, TEMP[3].zyzz 44: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 45: MOV TEMP[0].xy, TEMP[1].xyyy 46: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D 47: ADD TEMP[0].x, TEMP[2].xxxx, TEMP[0].wwww 48: MUL TEMP[0], TEMP[0].xxxx, IMM[1].xxxx 49: MOV OUT[0], TEMP[0] 50: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %29 = load <8 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %31 = load <4 x i32> addrspace(2)* %30, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = fadd float %24, %32 %35 = fadd float %25, %33 %36 = fmul float %34, 1.000000e+00 %37 = fadd float %36, 0.000000e+00 %38 = fmul float %35, -1.000000e+00 %39 = fadd float %38, 1.000000e+00 %40 = bitcast float %37 to i32 %41 = bitcast float %39 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = bitcast <8 x i32> %29 to <32 x i8> %45 = bitcast <4 x i32> %31 to <16 x i8> %46 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %44, <16 x i8> %45, i32 2) %47 = extractelement <4 x float> %46, i32 3 %48 = fadd float %24, %32 %49 = fadd float %27, %33 %50 = fmul float %48, 1.000000e+00 %51 = fadd float %50, 0.000000e+00 %52 = fmul float %49, -1.000000e+00 %53 = fadd float %52, 1.000000e+00 %54 = bitcast float %51 to i32 %55 = bitcast float %53 to i32 %56 = insertelement <2 x i32> undef, i32 %54, i32 0 %57 = insertelement <2 x i32> %56, i32 %55, i32 1 %58 = bitcast <8 x i32> %29 to <32 x i8> %59 = bitcast <4 x i32> %31 to <16 x i8> %60 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %57, <32 x i8> %58, <16 x i8> %59, i32 2) %61 = extractelement <4 x float> %60, i32 3 %62 = fadd float %47, %61 %63 = fadd float %26, %32 %64 = fadd float %25, %33 %65 = fmul float %63, 1.000000e+00 %66 = fadd float %65, 0.000000e+00 %67 = fmul float %64, -1.000000e+00 %68 = fadd float %67, 1.000000e+00 %69 = bitcast float %66 to i32 %70 = bitcast float %68 to i32 %71 = insertelement <2 x i32> undef, i32 %69, i32 0 %72 = insertelement <2 x i32> %71, i32 %70, i32 1 %73 = bitcast <8 x i32> %29 to <32 x i8> %74 = bitcast <4 x i32> %31 to <16 x i8> %75 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %73, <16 x i8> %74, i32 2) %76 = extractelement <4 x float> %75, i32 3 %77 = fadd float %62, %76 %78 = fadd float %26, %32 %79 = fadd float %27, %33 %80 = fmul float %78, 1.000000e+00 %81 = fadd float %80, 0.000000e+00 %82 = fmul float %79, -1.000000e+00 %83 = fadd float %82, 1.000000e+00 %84 = bitcast float %81 to i32 %85 = bitcast float %83 to i32 %86 = insertelement <2 x i32> undef, i32 %84, i32 0 %87 = insertelement <2 x i32> %86, i32 %85, i32 1 %88 = bitcast <8 x i32> %29 to <32 x i8> %89 = bitcast <4 x i32> %31 to <16 x i8> %90 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %87, <32 x i8> %88, <16 x i8> %89, i32 2) %91 = extractelement <4 x float> %90, i32 3 %92 = fadd float %77, %91 %93 = fmul float %24, 5.000000e-01 %94 = fadd float %93, %32 %95 = fmul float %25, 5.000000e-01 %96 = fadd float %95, %33 %97 = fmul float %94, 1.000000e+00 %98 = fadd float %97, 0.000000e+00 %99 = fmul float %96, -1.000000e+00 %100 = fadd float %99, 1.000000e+00 %101 = bitcast float %98 to i32 %102 = bitcast float %100 to i32 %103 = insertelement <2 x i32> undef, i32 %101, i32 0 %104 = insertelement <2 x i32> %103, i32 %102, i32 1 %105 = bitcast <8 x i32> %29 to <32 x i8> %106 = bitcast <4 x i32> %31 to <16 x i8> %107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %104, <32 x i8> %105, <16 x i8> %106, i32 2) %108 = extractelement <4 x float> %107, i32 3 %109 = fadd float %92, %108 %110 = fmul float %24, 5.000000e-01 %111 = fadd float %110, %32 %112 = fmul float %27, 5.000000e-01 %113 = fadd float %112, %33 %114 = fmul float %111, 1.000000e+00 %115 = fadd float %114, 0.000000e+00 %116 = fmul float %113, -1.000000e+00 %117 = fadd float %116, 1.000000e+00 %118 = bitcast float %115 to i32 %119 = bitcast float %117 to i32 %120 = insertelement <2 x i32> undef, i32 %118, i32 0 %121 = insertelement <2 x i32> %120, i32 %119, i32 1 %122 = bitcast <8 x i32> %29 to <32 x i8> %123 = bitcast <4 x i32> %31 to <16 x i8> %124 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %121, <32 x i8> %122, <16 x i8> %123, i32 2) %125 = extractelement <4 x float> %124, i32 3 %126 = fadd float %109, %125 %127 = fmul float %26, 5.000000e-01 %128 = fadd float %127, %32 %129 = fmul float %25, 5.000000e-01 %130 = fadd float %129, %33 %131 = fmul float %128, 1.000000e+00 %132 = fadd float %131, 0.000000e+00 %133 = fmul float %130, -1.000000e+00 %134 = fadd float %133, 1.000000e+00 %135 = bitcast float %132 to i32 %136 = bitcast float %134 to i32 %137 = insertelement <2 x i32> undef, i32 %135, i32 0 %138 = insertelement <2 x i32> %137, i32 %136, i32 1 %139 = bitcast <8 x i32> %29 to <32 x i8> %140 = bitcast <4 x i32> %31 to <16 x i8> %141 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %138, <32 x i8> %139, <16 x i8> %140, i32 2) %142 = extractelement <4 x float> %141, i32 3 %143 = fadd float %126, %142 %144 = fmul float %26, 5.000000e-01 %145 = fadd float %144, %32 %146 = fmul float %27, 5.000000e-01 %147 = fadd float %146, %33 %148 = fmul float %145, 1.000000e+00 %149 = fadd float %148, 0.000000e+00 %150 = fmul float %147, -1.000000e+00 %151 = fadd float %150, 1.000000e+00 %152 = bitcast float %149 to i32 %153 = bitcast float %151 to i32 %154 = insertelement <2 x i32> undef, i32 %152, i32 0 %155 = insertelement <2 x i32> %154, i32 %153, i32 1 %156 = bitcast <8 x i32> %29 to <32 x i8> %157 = bitcast <4 x i32> %31 to <16 x i8> %158 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %155, <32 x i8> %156, <16 x i8> %157, i32 2) %159 = extractelement <4 x float> %158, i32 3 %160 = fadd float %143, %159 %161 = fmul float %160, 1.250000e-01 %162 = fmul float %160, 1.250000e-01 %163 = fmul float %160, 1.250000e-01 %164 = fmul float %160, 1.250000e-01 %165 = call i32 @llvm.SI.packf16(float %161, float %162) %166 = bitcast i32 %165 to float %167 = call i32 @llvm.SI.packf16(float %163, float %164) %168 = bitcast i32 %167 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %166, float %168, float %166, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[12:15], s[2:3], 0x0 ; C0860300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s16, s[12:15], 0x0 ; C2080D00 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s16, v2 ; 06060410 V_INTERP_P1_F32 v5, v0, 1, 0, [m0] ; C8140100 V_INTERP_P2_F32 v5, [v5], v1, 1, 0, [m0] ; C8150101 S_BUFFER_LOAD_DWORD s17, s[12:15], 0x3 ; C2088D03 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s17, v5 ; 06000A11 V_SUB_F32_e32 v4, 1.000000e+00, v0 ; 080800F2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v0, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800800 00010003 S_BUFFER_LOAD_DWORD s18, s[12:15], 0x1 ; C2090D01 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_ADD_F32_e32 v1, s18, v5 ; 06020A12 V_SUB_F32_e32 v7, 1.000000e+00, v1 ; 080E02F2 V_MOV_B32_e32 v8, v3 ; 7E100303 V_MOV_B32_e32 v9, v4 ; 7E120304 V_MOV_B32_e32 v9, v7 ; 7E120307 IMAGE_SAMPLE v1, 8, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[4:11], s[0:3] ; F0800800 00010108 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v1, v0 ; 06000101 S_BUFFER_LOAD_DWORD s12, s[12:15], 0x2 ; C2060D02 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s12, v2 ; 060C040C IMAGE_SAMPLE v1, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800800 00010106 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v0, v1 ; 06000300 V_MOV_B32_e32 v7, v4 ; 7E0E0304 IMAGE_SAMPLE v1, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800800 00010106 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v0, v1 ; 06000300 V_MAD_F32 v3, s16, 5.000000e-01, v2, 0, 0 ; D2820003 0409E010 V_MAD_F32 v1, s18, 5.000000e-01, v5, 0, 0 ; D2820001 0415E012 V_SUB_F32_e32 v4, 1.000000e+00, v1 ; 080802F2 IMAGE_SAMPLE v1, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800800 00010103 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v0, v1 ; 06000300 V_MAD_F32 v1, s17, 5.000000e-01, v5, 0, 0 ; D2820001 0415E011 V_SUB_F32_e32 v1, 1.000000e+00, v1 ; 080202F2 V_MOV_B32_e32 v5, v3 ; 7E0A0303 V_MOV_B32_e32 v6, v4 ; 7E0C0304 V_MOV_B32_e32 v6, v1 ; 7E0C0301 IMAGE_SAMPLE v5, 8, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[4:11], s[0:3] ; F0800800 00010505 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v0, v5 ; 06000B00 V_MAD_F32 v3, s12, 5.000000e-01, v2, 0, 0 ; D2820003 0409E00C IMAGE_SAMPLE v2, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800800 00010203 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v0, v2 ; 06000500 V_MOV_B32_e32 v4, v1 ; 7E080301 IMAGE_SAMPLE v1, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800800 00010103 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v0, v1 ; 06000300 V_MUL_F32_e32 v0, 1.250000e-01, v0 ; 100000FF 3E000000 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v0 ; 5E000100 EXP 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[5], IN[0].yyyy 1: MAD TEMP[0], IN[0].xxxx, CONST[4], TEMP[0] 2: ADD TEMP[0], TEMP[0], CONST[7] 3: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 4: MAD TEMP[1], TEMP[0].xxxx, CONST[0], TEMP[1] 5: MAD TEMP[1], TEMP[0].zzzz, CONST[2], TEMP[1] 6: MAD TEMP[0], TEMP[0].wwww, CONST[3], TEMP[1] 7: MOV OUT[1], IN[1] 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0 %49 = add i32 %5, %7 %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %49) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = extractelement <4 x float> %50, i32 3 %55 = fmul float %33, %46 %56 = fmul float %34, %46 %57 = fmul float %35, %46 %58 = fmul float %36, %46 %59 = fmul float %45, %29 %60 = fadd float %59, %55 %61 = fmul float %45, %30 %62 = fadd float %61, %56 %63 = fmul float %45, %31 %64 = fadd float %63, %57 %65 = fmul float %45, %32 %66 = fadd float %65, %58 %67 = fadd float %60, %37 %68 = fadd float %62, %38 %69 = fadd float %64, %39 %70 = fadd float %66, %40 %71 = fmul float %68, %17 %72 = fmul float %68, %18 %73 = fmul float %68, %19 %74 = fmul float %68, %20 %75 = fmul float %67, %13 %76 = fadd float %75, %71 %77 = fmul float %67, %14 %78 = fadd float %77, %72 %79 = fmul float %67, %15 %80 = fadd float %79, %73 %81 = fmul float %67, %16 %82 = fadd float %81, %74 %83 = fmul float %69, %21 %84 = fadd float %83, %76 %85 = fmul float %69, %22 %86 = fadd float %85, %78 %87 = fmul float %69, %23 %88 = fadd float %87, %80 %89 = fmul float %69, %24 %90 = fadd float %89, %82 %91 = fmul float %70, %25 %92 = fadd float %91, %84 %93 = fmul float %70, %26 %94 = fadd float %93, %86 %95 = fmul float %70, %27 %96 = fadd float %95, %88 %97 = fmul float %70, %28 %98 = fadd float %97, %90 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %51, float %52, float %53, float %54) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %92, float %94, float %96, float %98) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v1 ; 10080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1c ; C202011C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s4, v4 ; 06080804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v1 ; 100A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x11 ; C2020111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v0, v6, v5, 0, 0 ; D2820005 04160D00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1d ; C202011D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v5 ; 100C0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v4, s4, v6, 0, 0 ; D2820006 04180904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s4 ; 7E100204 V_MAD_F32 v7, v0, v8, v7, 0, 0 ; D2820007 041E1100 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1e ; C202011E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v7, s4, v7 ; 060E0E04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v7, s4, v6, 0, 0 ; D2820006 04180907 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s4, v1 ; 10100204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s4 ; 7E120204 V_MAD_F32 v0, v0, v9, v8, 0, 0 ; D2820000 04221300 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1f ; C202011F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s4, v0 ; 06000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v0, s4, v6, 0, 0 ; D2820001 04180900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v5 ; 10040A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v4, s4, v2, 0, 0 ; D2820002 04080904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v7, s4, v2, 0, 0 ; D2820002 04080907 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xe ; C202010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v0, s4, v2, 0, 0 ; D2820002 04080900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v5 ; 10060A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v7, s4, v3, 0, 0 ; D2820003 040C0907 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v0, s4, v3, 0, 0 ; D2820003 040C0900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v5 ; 100A0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v4, s4, v5, 0, 0 ; D2820004 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v7, s4, v4, 0, 0 ; D2820004 04100907 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s0, v4, 0, 0 ; D2820000 04100100 EXP 15, 12, 0, 1, 0, v0, v3, v2, v1 ; F80008CF 01020300 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0] 1: MAD TEMP[1], TEMP[0].wwww, IMM[0].xxxy, IMM[0].yyyx 2: MUL TEMP[0], TEMP[0], TEMP[1] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %24, %28 %33 = fmul float %25, %29 %34 = fmul float %26, %30 %35 = fmul float %27, %31 %36 = fmul float %35, 1.000000e+00 %37 = fadd float %36, 0.000000e+00 %38 = fmul float %35, 1.000000e+00 %39 = fadd float %38, 0.000000e+00 %40 = fmul float %35, 1.000000e+00 %41 = fadd float %40, 0.000000e+00 %42 = fmul float %35, 0.000000e+00 %43 = fadd float %42, 1.000000e+00 %44 = fmul float %32, %37 %45 = fmul float %33, %39 %46 = fmul float %34, %41 %47 = fmul float %35, %43 %48 = call i32 @llvm.SI.packf16(float %44, float %45) %49 = bitcast i32 %48 to float %50 = call i32 @llvm.SI.packf16(float %46, float %47) %51 = bitcast i32 %50 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %49, float %51, float %49, float %51) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 3, 0, [m0] ; C8080300 V_INTERP_P2_F32 v2, [v2], v1, 3, 0, [m0] ; C8090301 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v2 ; 10040404 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v3 ; 10060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 V_INTERP_P1_F32 v4, v0, 0, 0, [m0] ; C8100000 V_INTERP_P2_F32 v4, [v4], v1, 0, 0, [m0] ; C8110001 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v4 ; 10080804 V_MUL_F32_e32 v4, v4, v2 ; 10080504 V_CVT_PKRTZ_F16_F32_e32 v3, v4, v3 ; 5E060704 V_INTERP_P1_F32 v4, v0, 2, 0, [m0] ; C8100200 V_INTERP_P2_F32 v4, [v4], v1, 2, 0, [m0] ; C8110201 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x2 ; C2000102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s0, v4 ; 10000800 V_MUL_F32_e32 v0, v0, v2 ; 10000500 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v2 ; 5E000500 EXP 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..15] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0], CONST[5], IN[0].yyyy 1: MAD TEMP[0], IN[0].xxxx, CONST[4], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[6], TEMP[0] 3: MAD TEMP[0], IN[0].wwww, CONST[7], TEMP[0] 4: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 5: MAD TEMP[1], TEMP[0].xxxx, CONST[0], TEMP[1] 6: MAD TEMP[1], TEMP[0].zzzz, CONST[2], TEMP[1] 7: MAD TEMP[1], TEMP[0].wwww, CONST[3], TEMP[1] 8: MUL TEMP[2].xy, CONST[13], IN[1].yyyy 9: MOV TEMP[0].xy, TEMP[2].xyxx 10: MAD TEMP[2].xy, IN[1].xxxx, CONST[12], TEMP[0] 11: MOV TEMP[0].xy, TEMP[2].xyxx 12: ADD TEMP[2].xy, TEMP[0], CONST[15] 13: MOV TEMP[2].xy, TEMP[2].xyxx 14: MUL TEMP[3].xy, CONST[9], IN[1].yyyy 15: MOV TEMP[0].xy, TEMP[3].xyxx 16: MAD TEMP[0].xy, IN[1].xxxx, CONST[8], TEMP[0] 17: ADD TEMP[0].zw, TEMP[0].xyxy, CONST[11].xyxy 18: MOV TEMP[2].zw, TEMP[0].wwzw 19: MOV OUT[1], TEMP[2] 20: MOV OUT[0], TEMP[1] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %57 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = fmul float %33, %62 %72 = fmul float %34, %62 %73 = fmul float %35, %62 %74 = fmul float %36, %62 %75 = fmul float %61, %29 %76 = fadd float %75, %71 %77 = fmul float %61, %30 %78 = fadd float %77, %72 %79 = fmul float %61, %31 %80 = fadd float %79, %73 %81 = fmul float %61, %32 %82 = fadd float %81, %74 %83 = fmul float %63, %37 %84 = fadd float %83, %76 %85 = fmul float %63, %38 %86 = fadd float %85, %78 %87 = fmul float %63, %39 %88 = fadd float %87, %80 %89 = fmul float %63, %40 %90 = fadd float %89, %82 %91 = fmul float %64, %41 %92 = fadd float %91, %84 %93 = fmul float %64, %42 %94 = fadd float %93, %86 %95 = fmul float %64, %43 %96 = fadd float %95, %88 %97 = fmul float %64, %44 %98 = fadd float %97, %90 %99 = fmul float %94, %17 %100 = fmul float %94, %18 %101 = fmul float %94, %19 %102 = fmul float %94, %20 %103 = fmul float %92, %13 %104 = fadd float %103, %99 %105 = fmul float %92, %14 %106 = fadd float %105, %100 %107 = fmul float %92, %15 %108 = fadd float %107, %101 %109 = fmul float %92, %16 %110 = fadd float %109, %102 %111 = fmul float %96, %21 %112 = fadd float %111, %104 %113 = fmul float %96, %22 %114 = fadd float %113, %106 %115 = fmul float %96, %23 %116 = fadd float %115, %108 %117 = fmul float %96, %24 %118 = fadd float %117, %110 %119 = fmul float %98, %25 %120 = fadd float %119, %112 %121 = fmul float %98, %26 %122 = fadd float %121, %114 %123 = fmul float %98, %27 %124 = fadd float %123, %116 %125 = fmul float %98, %28 %126 = fadd float %125, %118 %127 = fmul float %53, %70 %128 = fmul float %54, %70 %129 = fmul float %69, %51 %130 = fadd float %129, %127 %131 = fmul float %69, %52 %132 = fadd float %131, %128 %133 = fadd float %130, %55 %134 = fadd float %132, %56 %135 = fmul float %47, %70 %136 = fmul float %48, %70 %137 = fmul float %69, %45 %138 = fadd float %137, %135 %139 = fmul float %69, %46 %140 = fadd float %139, %136 %141 = fadd float %138, %49 %142 = fadd float %140, %50 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %133, float %134, float %141, float %142) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %120, float %122, float %124, float %126) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x25 ; C2020125 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v2 ; 100A0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v1, v6, v5, 0, 0 ; D2820005 04160D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2d ; C202012D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x24 ; C2020124 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x20 ; C2020120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v6, v1, v7, v6, 0, 0 ; D2820006 041A0F01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2c ; C202012C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s4, v6 ; 060C0C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x35 ; C2020135 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v2 ; 100E0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x31 ; C2020131 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s4 ; 7E100204 V_MAD_F32 v7, v1, v8, v7, 0, 0 ; D2820007 041E1101 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3d ; C202013D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v7, s4, v7 ; 060E0E04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x34 ; C2020134 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s4, v2 ; 10100404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x30 ; C2020130 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s4 ; 7E120204 V_MAD_F32 v1, v1, v9, v8, 0, 0 ; D2820001 04221301 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3c ; C202013C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 EXP 15, 32, 0, 0, 0, v1, v7, v6, v5 ; F800020F 05060701 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MUL_F32_e32 v4, s4, v1 ; 10080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v2, v5, v4, 0, 0 ; D2820004 04120B02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1c ; C202011C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v3, v5, v4, 0, 0 ; D2820004 04120B03 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v1 ; 100A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x11 ; C2020111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v0, v6, v5, 0, 0 ; D2820005 04160D00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v2, v6, v5, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1d ; C202011D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v3, v6, v5, 0, 0 ; D2820005 04160D03 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v5 ; 100C0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v4, s4, v6, 0, 0 ; D2820006 04180904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s4 ; 7E100204 V_MAD_F32 v7, v0, v8, v7, 0, 0 ; D2820007 041E1100 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s4 ; 7E100204 V_MAD_F32 v7, v2, v8, v7, 0, 0 ; D2820007 041E1102 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1e ; C202011E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s4 ; 7E100204 V_MAD_F32 v7, v3, v8, v7, 0, 0 ; D2820007 041E1103 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v7, s4, v6, 0, 0 ; D2820006 04180907 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s4, v1 ; 10100204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s4 ; 7E120204 V_MAD_F32 v8, v0, v9, v8, 0, 0 ; D2820008 04221300 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s4 ; 7E120204 V_MAD_F32 v8, v2, v9, v8, 0, 0 ; D2820008 04221302 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1f ; C202011F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s4 ; 7E120204 V_MAD_F32 v0, v3, v9, v8, 0, 0 ; D2820000 04221303 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v0, s4, v6, 0, 0 ; D2820001 04180900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v5 ; 10040A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v4, s4, v2, 0, 0 ; D2820002 04080904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v7, s4, v2, 0, 0 ; D2820002 04080907 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xe ; C202010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v0, s4, v2, 0, 0 ; D2820002 04080900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v5 ; 10060A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v7, s4, v3, 0, 0 ; D2820003 040C0907 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v0, s4, v3, 0, 0 ; D2820003 040C0900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v5 ; 100A0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v4, s4, v5, 0, 0 ; D2820004 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v7, s4, v4, 0, 0 ; D2820004 04100907 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s0, v4, 0, 0 ; D2820000 04100100 EXP 15, 12, 0, 1, 0, v0, v3, v2, v1 ; F80008CF 01020300 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { -0.0010, 0.0000, -1.0000, -0.0000} IMM[1] FLT32 { 1.0000, -0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: ADD TEMP[1].x, TEMP[0].wwww, IMM[0].xxxx 3: FSGE TEMP[2].x, TEMP[1].xxxx, IMM[0].yyyy 4: UIF TEMP[2].xxxx :0 5: MOV TEMP[2].x, IMM[0].yyyy 6: ELSE :0 7: MOV TEMP[2].x, IMM[0].zzzz 8: ENDIF 9: MOV TEMP[2].x, TEMP[2].xxxx 10: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].yyyy 11: UIF TEMP[3].xxxx :0 12: MOV TEMP[3].x, IMM[0].yyyy 13: ELSE :0 14: MOV TEMP[3].x, IMM[0].zzzz 15: ENDIF 16: MOV TEMP[2].y, TEMP[3].xxxx 17: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].yyyy 18: UIF TEMP[3].xxxx :0 19: MOV TEMP[3].x, IMM[0].yyyy 20: ELSE :0 21: MOV TEMP[3].x, IMM[0].zzzz 22: ENDIF 23: MOV TEMP[2].z, TEMP[3].xxxx 24: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].yyyy 25: UIF TEMP[3].xxxx :0 26: ELSE :0 27: ENDIF 28: FSLT TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyyy 29: OR TEMP[3].x, TEMP[2].xxxx, TEMP[2].zzzz 30: OR TEMP[3].x, TEMP[3].xxxx, TEMP[2].yyyy 31: UIF TEMP[3].xxxx :0 32: KILL 33: ENDIF 34: MAD TEMP[1], TEMP[0].wwww, IMM[1].xxxy, IMM[1].yyyx 35: MUL TEMP[0], TEMP[0], TEMP[1] 36: MOV OUT[0], TEMP[0] 37: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = fadd float %38, 0xBF50624DE0000000 %40 = fcmp oge float %39, 0.000000e+00 %41 = sext i1 %40 to i32 %42 = bitcast i32 %41 to float %43 = bitcast float %42 to i32 %44 = icmp ne i32 %43, 0 %. = select i1 %44, float 0.000000e+00, float -1.000000e+00 %45 = fcmp oge float %39, 0.000000e+00 %46 = sext i1 %45 to i32 %47 = bitcast i32 %46 to float %48 = bitcast float %47 to i32 %49 = icmp ne i32 %48, 0 %temp12.0 = select i1 %49, float 0.000000e+00, float -1.000000e+00 %50 = fcmp oge float %39, 0.000000e+00 %51 = sext i1 %50 to i32 %52 = bitcast i32 %51 to float %53 = bitcast float %52 to i32 %54 = icmp ne i32 %53, 0 %.28 = select i1 %54, float 0.000000e+00, float -1.000000e+00 %55 = fcmp oge float %39, 0.000000e+00 %56 = sext i1 %55 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = icmp ne i32 %58, 0 %60 = fcmp olt float %., 0.000000e+00 %61 = sext i1 %60 to i32 %62 = fcmp olt float %temp12.0, 0.000000e+00 %63 = sext i1 %62 to i32 %64 = fcmp olt float %.28, 0.000000e+00 %65 = sext i1 %64 to i32 %66 = bitcast i32 %61 to float %67 = bitcast i32 %63 to float %68 = bitcast i32 %65 to float %69 = bitcast float %66 to i32 %70 = bitcast float %68 to i32 %71 = or i32 %69, %70 %72 = bitcast i32 %71 to float %73 = bitcast float %72 to i32 %74 = bitcast float %67 to i32 %75 = or i32 %73, %74 %76 = bitcast i32 %75 to float %77 = bitcast float %76 to i32 %78 = icmp ne i32 %77, 0 br i1 %78, label %IF26, label %ENDIF25 IF26: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF25 ENDIF25: ; preds = %main_body, %IF26 %79 = fmul float %38, 1.000000e+00 %80 = fadd float %79, -0.000000e+00 %81 = fmul float %38, 1.000000e+00 %82 = fadd float %81, -0.000000e+00 %83 = fmul float %38, 1.000000e+00 %84 = fadd float %83, -0.000000e+00 %85 = fmul float %38, -0.000000e+00 %86 = fadd float %85, 1.000000e+00 %87 = fmul float %35, %80 %88 = fmul float %36, %82 %89 = fmul float %37, %84 %90 = fmul float %38, %86 %91 = call i32 @llvm.SI.packf16(float %87, float %88) %92 = bitcast i32 %91 to float %93 = call i32 @llvm.SI.packf16(float %89, float %90) %94 = bitcast i32 %93 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %92, float %94, float %92, float %94) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 V_MOV_B32_e32 v4, -1.000000e-03 ; 7E0802FF BA83126F S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v4, v3, v4 ; 06080903 V_CMP_GE_F32_e64 s[0:1], v4, 0.000000e+00, 0, 0 ; D00C0000 00010104 V_CNDMASK_B32_e64 v4, -1.000000e+00, 0, s[0:1], 0, 0, 0, 0 ; D2000004 000100F3 V_CMP_LT_F32_e64 s[0:1], v4, 0.000000e+00, 0, 0 ; D0020000 00010104 V_CNDMASK_B32_e64 v4, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000004 00018280 V_OR_B32_e32 v4, v4, v4 ; 38080904 V_CMP_NE_I32_e64 s[0:1], v4, 0, 0, 0 ; D10A0000 00010104 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E V_MUL_F32_e32 v4, v1, v3 ; 10080701 V_MUL_F32_e32 v5, v0, v3 ; 100A0700 V_CVT_PKRTZ_F16_F32_e32 v4, v5, v4 ; 5E080905 V_MUL_F32_e32 v5, v2, v3 ; 100A0702 V_CVT_PKRTZ_F16_F32_e32 v0, v5, v3 ; 5E000705 EXP 15, 0, 1, 1, 1, v4, v0, v4, v0 ; F8001C0F 00040004 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} 0: FRC TEMP[0].x, IN[1].xxxx 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: UARL ADDR[0].x, TEMP[2].xxxx 7: MUL TEMP[0], IN[0].yyyy, CONST[ADDR[0].x] 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: UARL ADDR[0].x, TEMP[2].xxxx 11: MAD TEMP[0], IN[0].xxxx, CONST[ADDR[0].x], TEMP[0] 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MAD TEMP[0], IN[0].zzzz, CONST[ADDR[0].x], TEMP[0] 17: F2I TEMP[1].x, TEMP[1].xxxx 18: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: UARL ADDR[0].x, TEMP[1].xxxx 21: MAD TEMP[0], IN[0].wwww, CONST[ADDR[0].x], TEMP[0] 22: MOV TEMP[1].xyz, IN[2].xxxx 23: MOV OUT[1], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0 %28 = add i32 %5, %7 %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %27, i32 0, i32 %28) %30 = extractelement <4 x float> %29, i32 0 %31 = call float @llvm.AMDIL.fraction.(float %25) %32 = fsub float -0.000000e+00, %31 %33 = fadd float %32, %25 %34 = fmul float %33, 4.000000e+00 %35 = fptosi float %34 to i32 %36 = bitcast i32 %35 to float %37 = bitcast float %36 to i32 %38 = add i32 1, %37 %39 = bitcast i32 %38 to float %40 = bitcast float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %18, %42 %44 = shl i32 %40, 4 %45 = add i32 %44, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %45) %47 = fmul float %18, %46 %48 = shl i32 %40, 4 %49 = add i32 %48, 8 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %18, %50 %52 = shl i32 %40, 4 %53 = add i32 %52, 12 %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %53) %55 = fmul float %18, %54 %56 = fptosi float %34 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = shl i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %17, %60 %62 = fadd float %61, %43 %63 = shl i32 %58, 4 %64 = add i32 %63, 4 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = fmul float %17, %65 %67 = fadd float %66, %47 %68 = shl i32 %58, 4 %69 = add i32 %68, 8 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %17, %70 %72 = fadd float %71, %51 %73 = shl i32 %58, 4 %74 = add i32 %73, 12 %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %74) %76 = fmul float %17, %75 %77 = fadd float %76, %55 %78 = fptosi float %34 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = add i32 2, %80 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = shl i32 %83, 4 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %19, %85 %87 = fadd float %86, %62 %88 = shl i32 %83, 4 %89 = add i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = fmul float %19, %90 %92 = fadd float %91, %67 %93 = shl i32 %83, 4 %94 = add i32 %93, 8 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = fmul float %19, %95 %97 = fadd float %96, %72 %98 = shl i32 %83, 4 %99 = add i32 %98, 12 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = fmul float %19, %100 %102 = fadd float %101, %77 %103 = fptosi float %34 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = add i32 3, %105 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = shl i32 %108, 4 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %20, %110 %112 = fadd float %111, %87 %113 = shl i32 %108, 4 %114 = add i32 %113, 4 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %20, %115 %117 = fadd float %116, %92 %118 = shl i32 %108, 4 %119 = add i32 %118, 8 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fmul float %20, %120 %122 = fadd float %121, %97 %123 = shl i32 %108, 4 %124 = add i32 %123, 12 %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %124) %126 = fmul float %20, %125 %127 = fadd float %126, %102 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %30, float %30, float %30, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %112, float %117, float %122, float %127) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 0.000000e+00 ; 7E0A0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v1, v1, v5 ; F800020F 05010101 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 V_FRACT_F32_e32 v5, v1 ; 7E0A4101 V_SUB_F32_e32 v1, v5, v1 ; 08020305 V_MUL_F32_e32 v1, -4.000000e+00, v1 ; 100202F7 V_CVT_I32_F32_e32 v1, v1 ; 7E021101 V_LSHLREV_B32_e32 v2, 4, v1 ; 34040284 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v3, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000302 V_ADD_I32_e32 v4, 1, v1 ; 4A080281 V_LSHLREV_B32_e32 v4, 4, v4 ; 34080884 BUFFER_LOAD_DWORD v5, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000504 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[6:9], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010600 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v7, v5 ; 10000B07 V_MAD_F32 v0, v6, v3, v0, 0, 0 ; D2820000 04020706 V_ADD_I32_e32 v3, 2, v1 ; 4A060282 V_LSHLREV_B32_e32 v3, 4, v3 ; 34060684 BUFFER_LOAD_DWORD v5, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000503 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v8, v5, v0, 0, 0 ; D2820000 04020B08 V_ADD_I32_e32 v1, 3, v1 ; 4A020283 V_LSHLREV_B32_e32 v1, 4, v1 ; 34020284 BUFFER_LOAD_DWORD v5, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000501 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v9, v5, v0, 0, 0 ; D2820000 04020B09 V_OR_B32_e32 v5, 12, v2 ; 380A048C BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 V_OR_B32_e32 v10, 12, v4 ; 3814088C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v10, v7, v10 ; 10141507 V_MAD_F32 v5, v6, v5, v10, 0, 0 ; D2820005 042A0B06 V_OR_B32_e32 v10, 12, v3 ; 3814068C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v5, v8, v10, v5, 0, 0 ; D2820005 04161508 V_OR_B32_e32 v10, 12, v1 ; 3814028C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v5, v9, v10, v5, 0, 0 ; D2820005 04161509 V_OR_B32_e32 v10, 8, v2 ; 38140488 BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A V_OR_B32_e32 v11, 8, v4 ; 38160888 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v11, v7, v11 ; 10161707 V_MAD_F32 v10, v6, v10, v11, 0, 0 ; D282000A 042E1506 V_OR_B32_e32 v11, 8, v3 ; 38160688 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v10, v8, v11, v10, 0, 0 ; D282000A 042A1708 V_OR_B32_e32 v11, 8, v1 ; 38160288 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v10, v9, v11, v10, 0, 0 ; D282000A 042A1709 V_OR_B32_e32 v2, 4, v2 ; 38040484 BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 V_OR_B32_e32 v4, 4, v4 ; 38080884 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v7, v4 ; 10080907 V_MAD_F32 v2, v6, v2, v4, 0, 0 ; D2820002 04120506 V_OR_B32_e32 v3, 4, v3 ; 38060684 BUFFER_LOAD_DWORD v3, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000303 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v2, v8, v3, v2, 0, 0 ; D2820002 040A0708 V_OR_B32_e32 v1, 4, v1 ; 38020284 BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v9, v1, v2, 0, 0 ; D2820001 040A0309 EXP 15, 12, 0, 1, 0, v0, v1, v10, v5 ; F80008CF 050A0100 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xxxx, IMM[0].xyyy, IMM[0].yyyx 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = fmul float %22, 1.000000e+00 %24 = fadd float %23, 0.000000e+00 %25 = fmul float %22, 0.000000e+00 %26 = fadd float %25, 0.000000e+00 %27 = fmul float %22, 0.000000e+00 %28 = fadd float %27, 0.000000e+00 %29 = fmul float %22, 0.000000e+00 %30 = fadd float %29, 1.000000e+00 %31 = call i32 @llvm.SI.packf16(float %24, float %26) %32 = bitcast i32 %31 to float %33 = call i32 @llvm.SI.packf16(float %28, float %30) %34 = bitcast i32 %33 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %32, float %34, float %32, float %34) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_CVT_PKRTZ_F16_F32_e64 v0, v2, 0.000000e+00, 0, 0 ; D25E0000 00010102 V_CVT_PKRTZ_F16_F32_e64 v1, 0.000000e+00, 1.000000e+00, 0, 0 ; D25E0001 0001E480 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} 0: FRC TEMP[0].x, IN[1].xxxx 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: UARL ADDR[0].x, TEMP[2].xxxx 7: MUL TEMP[0], IN[0].yyyy, CONST[ADDR[0].x] 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: UARL ADDR[0].x, TEMP[2].xxxx 11: MAD TEMP[0], IN[0].xxxx, CONST[ADDR[0].x], TEMP[0] 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MAD TEMP[0], IN[0].zzzz, CONST[ADDR[0].x], TEMP[0] 17: F2I TEMP[1].x, TEMP[1].xxxx 18: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: UARL ADDR[0].x, TEMP[1].xxxx 21: MAD TEMP[0], IN[0].wwww, CONST[ADDR[0].x], TEMP[0] 22: MOV TEMP[1].xyz, IN[2].xxxx 23: MOV OUT[1], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0 %28 = add i32 %5, %7 %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %27, i32 0, i32 %28) %30 = extractelement <4 x float> %29, i32 0 %31 = call float @llvm.AMDIL.fraction.(float %25) %32 = fsub float -0.000000e+00, %31 %33 = fadd float %32, %25 %34 = fmul float %33, 4.000000e+00 %35 = fptosi float %34 to i32 %36 = bitcast i32 %35 to float %37 = bitcast float %36 to i32 %38 = add i32 1, %37 %39 = bitcast i32 %38 to float %40 = bitcast float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %18, %42 %44 = shl i32 %40, 4 %45 = add i32 %44, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %45) %47 = fmul float %18, %46 %48 = shl i32 %40, 4 %49 = add i32 %48, 8 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %18, %50 %52 = shl i32 %40, 4 %53 = add i32 %52, 12 %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %53) %55 = fmul float %18, %54 %56 = fptosi float %34 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = shl i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %17, %60 %62 = fadd float %61, %43 %63 = shl i32 %58, 4 %64 = add i32 %63, 4 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = fmul float %17, %65 %67 = fadd float %66, %47 %68 = shl i32 %58, 4 %69 = add i32 %68, 8 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %17, %70 %72 = fadd float %71, %51 %73 = shl i32 %58, 4 %74 = add i32 %73, 12 %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %74) %76 = fmul float %17, %75 %77 = fadd float %76, %55 %78 = fptosi float %34 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = add i32 2, %80 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = shl i32 %83, 4 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %19, %85 %87 = fadd float %86, %62 %88 = shl i32 %83, 4 %89 = add i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = fmul float %19, %90 %92 = fadd float %91, %67 %93 = shl i32 %83, 4 %94 = add i32 %93, 8 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = fmul float %19, %95 %97 = fadd float %96, %72 %98 = shl i32 %83, 4 %99 = add i32 %98, 12 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = fmul float %19, %100 %102 = fadd float %101, %77 %103 = fptosi float %34 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = add i32 3, %105 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = shl i32 %108, 4 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %20, %110 %112 = fadd float %111, %87 %113 = shl i32 %108, 4 %114 = add i32 %113, 4 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %20, %115 %117 = fadd float %116, %92 %118 = shl i32 %108, 4 %119 = add i32 %118, 8 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fmul float %20, %120 %122 = fadd float %121, %97 %123 = shl i32 %108, 4 %124 = add i32 %123, 12 %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %124) %126 = fmul float %20, %125 %127 = fadd float %126, %102 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %30, float %30, float %30, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %112, float %117, float %122, float %127) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 0.000000e+00 ; 7E0A0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v1, v1, v5 ; F800020F 05010101 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 V_FRACT_F32_e32 v5, v1 ; 7E0A4101 V_SUB_F32_e32 v1, v5, v1 ; 08020305 V_MUL_F32_e32 v1, -4.000000e+00, v1 ; 100202F7 V_CVT_I32_F32_e32 v1, v1 ; 7E021101 V_LSHLREV_B32_e32 v2, 4, v1 ; 34040284 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v3, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000302 V_ADD_I32_e32 v4, 1, v1 ; 4A080281 V_LSHLREV_B32_e32 v4, 4, v4 ; 34080884 BUFFER_LOAD_DWORD v5, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000504 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[6:9], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010600 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v7, v5 ; 10000B07 V_MAD_F32 v0, v6, v3, v0, 0, 0 ; D2820000 04020706 V_ADD_I32_e32 v3, 2, v1 ; 4A060282 V_LSHLREV_B32_e32 v3, 4, v3 ; 34060684 BUFFER_LOAD_DWORD v5, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000503 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v8, v5, v0, 0, 0 ; D2820000 04020B08 V_ADD_I32_e32 v1, 3, v1 ; 4A020283 V_LSHLREV_B32_e32 v1, 4, v1 ; 34020284 BUFFER_LOAD_DWORD v5, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000501 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v9, v5, v0, 0, 0 ; D2820000 04020B09 V_OR_B32_e32 v5, 12, v2 ; 380A048C BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 V_OR_B32_e32 v10, 12, v4 ; 3814088C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v10, v7, v10 ; 10141507 V_MAD_F32 v5, v6, v5, v10, 0, 0 ; D2820005 042A0B06 V_OR_B32_e32 v10, 12, v3 ; 3814068C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v5, v8, v10, v5, 0, 0 ; D2820005 04161508 V_OR_B32_e32 v10, 12, v1 ; 3814028C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v5, v9, v10, v5, 0, 0 ; D2820005 04161509 V_OR_B32_e32 v10, 8, v2 ; 38140488 BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A V_OR_B32_e32 v11, 8, v4 ; 38160888 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v11, v7, v11 ; 10161707 V_MAD_F32 v10, v6, v10, v11, 0, 0 ; D282000A 042E1506 V_OR_B32_e32 v11, 8, v3 ; 38160688 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v10, v8, v11, v10, 0, 0 ; D282000A 042A1708 V_OR_B32_e32 v11, 8, v1 ; 38160288 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v10, v9, v11, v10, 0, 0 ; D282000A 042A1709 V_OR_B32_e32 v2, 4, v2 ; 38040484 BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 V_OR_B32_e32 v4, 4, v4 ; 38080884 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v7, v4 ; 10080907 V_MAD_F32 v2, v6, v2, v4, 0, 0 ; D2820002 04120506 V_OR_B32_e32 v3, 4, v3 ; 38060684 BUFFER_LOAD_DWORD v3, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000303 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v2, v8, v3, v2, 0, 0 ; D2820002 040A0708 V_OR_B32_e32 v1, 4, v1 ; 38020284 BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v9, v1, v2, 0, 0 ; D2820001 040A0309 EXP 15, 12, 0, 1, 0, v0, v1, v10, v5 ; F80008CF 050A0100 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xxxx, IMM[0].xyxy, IMM[0].yyyx 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = fmul float %22, 1.000000e+00 %24 = fadd float %23, 0.000000e+00 %25 = fmul float %22, 0.000000e+00 %26 = fadd float %25, 0.000000e+00 %27 = fmul float %22, 1.000000e+00 %28 = fadd float %27, 0.000000e+00 %29 = fmul float %22, 0.000000e+00 %30 = fadd float %29, 1.000000e+00 %31 = call i32 @llvm.SI.packf16(float %24, float %26) %32 = bitcast i32 %31 to float %33 = call i32 @llvm.SI.packf16(float %28, float %30) %34 = bitcast i32 %33 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %32, float %34, float %32, float %34) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_CVT_PKRTZ_F16_F32_e64 v0, v2, 1.000000e+00, 0, 0 ; D25E0000 0001E502 V_CVT_PKRTZ_F16_F32_e64 v1, v2, 0.000000e+00, 0, 0 ; D25E0001 00010102 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL SAMP[0] DCL CONST[0..15] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 255.0000, 1.0000} IMM[1] FLT32 { 0.5000, -0.5000, 1.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[15], IN[0] 1: MOV TEMP[1].xy, TEMP[0].xyxx 2: ADD TEMP[0].zw, TEMP[0].xyxy, CONST[15] 3: MOV TEMP[1].zw, TEMP[0].wwzw 4: MUL TEMP[0].xy, TEMP[1], CONST[9] 5: MOV TEMP[1].xy, TEMP[0].xyxx 6: ADD TEMP[2].zw, TEMP[1], IMM[0].xxxx 7: MUL TEMP[2].xy, TEMP[2].zwzw, CONST[10] 8: MOV TEMP[3].xy, TEMP[2].xyyy 9: MOV TEMP[3].w, IMM[0].yyyy 10: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 11: MUL TEMP[3].z, TEMP[3].wwww, CONST[9].zzzz 12: MUL TEMP[3].z, TEMP[3].zzzz, IMM[0].zzzz 13: MUL TEMP[2], TEMP[0].yyyy, CONST[12] 14: MAD TEMP[2], TEMP[0].xxxx, CONST[11], TEMP[2] 15: MAD TEMP[1], TEMP[3].zzzz, CONST[13], TEMP[2] 16: ADD TEMP[1], TEMP[1], CONST[14] 17: ADD TEMP[0].xy, TEMP[1], CONST[8].zwzw 18: MUL TEMP[0].zw, TEMP[0].xyxy, CONST[8].xyxy 19: MOV TEMP[0].zw, TEMP[0].wwzw 20: MUL TEMP[2], TEMP[1].yyyy, CONST[1] 21: MAD TEMP[2], TEMP[1].xxxx, CONST[0], TEMP[2] 22: MAD TEMP[2], TEMP[1].zzzz, CONST[2], TEMP[2] 23: MAD TEMP[2], TEMP[1].wwww, CONST[3], TEMP[2] 24: RCP TEMP[3].x, TEMP[2].wwww 25: MUL TEMP[3].xy, TEMP[2], TEMP[3].xxxx 26: MOV TEMP[3].xy, TEMP[3].xyxx 27: MOV TEMP[4], TEMP[2] 28: MOV TEMP[0].xy, IN[1].xyxx 29: MUL TEMP[2], TEMP[1].yyyy, CONST[5] 30: MAD TEMP[2], TEMP[1].xxxx, CONST[4], TEMP[2] 31: MAD TEMP[2], TEMP[1].zzzz, CONST[6], TEMP[2] 32: MAD TEMP[2], TEMP[1].wwww, CONST[7], TEMP[2] 33: MOV TEMP[1].xyz, TEMP[1].xyzx 34: MAD TEMP[2], TEMP[2], IMM[1].xyzz, IMM[0].xxyy 35: MOV TEMP[1].w, IMM[0].wwww 36: MOV TEMP[3].zw, IMM[0].wwyw 37: MOV OUT[1], TEMP[0] 38: MOV OUT[2], TEMP[2] 39: MOV OUT[0], TEMP[4] 40: MOV OUT[3], TEMP[1] 41: MOV OUT[4], TEMP[3] 42: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %74 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0 %80 = add i32 %5, %7 %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %80) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %85 = load <16 x i8> addrspace(2)* %84, !tbaa !0 %86 = add i32 %5, %7 %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %86) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = fadd float %70, %82 %91 = fadd float %71, %83 %92 = fadd float %90, %72 %93 = fadd float %91, %73 %94 = fmul float %90, %49 %95 = fmul float %91, %50 %96 = fadd float %92, 5.000000e-01 %97 = fadd float %93, 5.000000e-01 %98 = fmul float %96, %52 %99 = fmul float %97, %53 %100 = bitcast float %98 to i32 %101 = bitcast float %99 to i32 %102 = bitcast float 0.000000e+00 to i32 %103 = insertelement <4 x i32> undef, i32 %100, i32 0 %104 = insertelement <4 x i32> %103, i32 %101, i32 1 %105 = insertelement <4 x i32> %104, i32 %102, i32 2 %106 = insertelement <4 x i32> %105, i32 undef, i32 3 %107 = bitcast <8 x i32> %75 to <32 x i8> %108 = bitcast <4 x i32> %77 to <16 x i8> %109 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %106, <32 x i8> %107, <16 x i8> %108, i32 2) %110 = extractelement <4 x float> %109, i32 3 %111 = fmul float %110, %51 %112 = fmul float %111, 2.550000e+02 %113 = fmul float %95, %58 %114 = fmul float %95, %59 %115 = fmul float %95, %60 %116 = fmul float %95, %61 %117 = fmul float %94, %54 %118 = fadd float %117, %113 %119 = fmul float %94, %55 %120 = fadd float %119, %114 %121 = fmul float %94, %56 %122 = fadd float %121, %115 %123 = fmul float %94, %57 %124 = fadd float %123, %116 %125 = fmul float %112, %62 %126 = fadd float %125, %118 %127 = fmul float %112, %63 %128 = fadd float %127, %120 %129 = fmul float %112, %64 %130 = fadd float %129, %122 %131 = fmul float %112, %65 %132 = fadd float %131, %124 %133 = fadd float %126, %66 %134 = fadd float %128, %67 %135 = fadd float %130, %68 %136 = fadd float %132, %69 %137 = fadd float %133, %47 %138 = fadd float %134, %48 %139 = fmul float %137, %45 %140 = fmul float %138, %46 %141 = fmul float %134, %17 %142 = fmul float %134, %18 %143 = fmul float %134, %19 %144 = fmul float %134, %20 %145 = fmul float %133, %13 %146 = fadd float %145, %141 %147 = fmul float %133, %14 %148 = fadd float %147, %142 %149 = fmul float %133, %15 %150 = fadd float %149, %143 %151 = fmul float %133, %16 %152 = fadd float %151, %144 %153 = fmul float %135, %21 %154 = fadd float %153, %146 %155 = fmul float %135, %22 %156 = fadd float %155, %148 %157 = fmul float %135, %23 %158 = fadd float %157, %150 %159 = fmul float %135, %24 %160 = fadd float %159, %152 %161 = fmul float %136, %25 %162 = fadd float %161, %154 %163 = fmul float %136, %26 %164 = fadd float %163, %156 %165 = fmul float %136, %27 %166 = fadd float %165, %158 %167 = fmul float %136, %28 %168 = fadd float %167, %160 %169 = fdiv float 1.000000e+00, %168 %170 = fmul float %162, %169 %171 = fmul float %164, %169 %172 = fmul float %134, %33 %173 = fmul float %134, %34 %174 = fmul float %134, %35 %175 = fmul float %134, %36 %176 = fmul float %133, %29 %177 = fadd float %176, %172 %178 = fmul float %133, %30 %179 = fadd float %178, %173 %180 = fmul float %133, %31 %181 = fadd float %180, %174 %182 = fmul float %133, %32 %183 = fadd float %182, %175 %184 = fmul float %135, %37 %185 = fadd float %184, %177 %186 = fmul float %135, %38 %187 = fadd float %186, %179 %188 = fmul float %135, %39 %189 = fadd float %188, %181 %190 = fmul float %135, %40 %191 = fadd float %190, %183 %192 = fmul float %136, %41 %193 = fadd float %192, %185 %194 = fmul float %136, %42 %195 = fadd float %194, %187 %196 = fmul float %136, %43 %197 = fadd float %196, %189 %198 = fmul float %136, %44 %199 = fadd float %198, %191 %200 = fmul float %193, 5.000000e-01 %201 = fadd float %200, 5.000000e-01 %202 = fmul float %195, -5.000000e-01 %203 = fadd float %202, 5.000000e-01 %204 = fmul float %197, 1.000000e+00 %205 = fadd float %204, 0.000000e+00 %206 = fmul float %199, 1.000000e+00 %207 = fadd float %206, 0.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float %139, float %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %201, float %203, float %205, float %207) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %133, float %134, float %135, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %170, float %171, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %162, float %164, float %166, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[12:15], s[8:9], 0x0 ; C0860900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[12:15][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80030100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x3c ; C205013C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s10, v1 ; 060A020A S_BUFFER_LOAD_DWORD s10, s[0:3], 0x24 ; C2050124 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s10, v5 ; 100C0A0A S_BUFFER_LOAD_DWORD s10, s[0:3], 0x3d ; C205013D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s10, v2 ; 0602040A S_BUFFER_LOAD_DWORD s10, s[0:3], 0x25 ; C2050125 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s10, v1 ; 1004020A S_BUFFER_LOAD_DWORD s10, s[0:3], 0x31 ; C2050131 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s10, v2 ; 1006040A S_BUFFER_LOAD_DWORD s10, s[0:3], 0x2d ; C205012D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v6, s10, v3, 0, 0 ; D2820003 040C1506 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x3f ; C205013F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s10, v1 ; 0602020A V_ADD_F32_e32 v1, 5.000000e-01, v1 ; 060202F0 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x29 ; C2050129 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s10, v1 ; 1010020A S_BUFFER_LOAD_DWORD s10, s[0:3], 0x3e ; C205013E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s10, v5 ; 06020A0A V_ADD_F32_e32 v1, 5.000000e-01, v1 ; 060202F0 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x28 ; C2050128 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s10, v1 ; 100E020A V_MOV_B32_e32 v9, 0 ; 7E120280 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x0 ; C0860500 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x0 ; C0C80700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE_L v1, 8, 0, 0, 0, 0, 0, 0, 0, v[7:10], s[16:23], s[12:15] ; F0900800 00640107 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x26 ; C2020126 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MUL_F32_e32 v1, s4, v1 ; 10020204 V_MUL_F32_e32 v1, 2.550000e+02, v1 ; 100202FF 437F0000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x35 ; C2020135 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v1, s4, v3, 0, 0 ; D2820003 040C0901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x39 ; C2020139 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x23 ; C2020123 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s4, v3 ; 06080604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v4 ; 10080804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x30 ; C2020130 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v2 ; 100A0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2c ; C202012C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v6, s4, v5, 0, 0 ; D2820005 04140906 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x34 ; C2020134 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v1, s4, v5, 0, 0 ; D2820005 04140901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x38 ; C2020138 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x22 ; C2020122 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v7, s4, v5 ; 060E0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x20 ; C2020120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v7 ; 100E0E04 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[8:11], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010800 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v8, v9, v7, v4 ; F800020F 04070908 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v0, s4, v3 ; 10000604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v5, s4, v0, 0, 0 ; D2820000 04000905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x32 ; C2020132 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v2 ; 10080404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2e ; C202012E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v6, s4, v4, 0, 0 ; D2820004 04100906 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x36 ; C2020136 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v1, s4, v4, 0, 0 ; D2820004 04100901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3a ; C202013A S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s4, v4 ; 06080804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x33 ; C2020133 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v2 ; 10040404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2f ; C202012F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v6, s4, v2, 0, 0 ; D2820002 04080906 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x37 ; C2020137 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v1, s4, v2, 0, 0 ; D2820001 04080901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3b ; C202013B S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1f ; C202011F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v1, s4, v0, 0, 0 ; D2820000 04000901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v3 ; 10040604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v5, s4, v2, 0, 0 ; D2820002 04080905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v4, s4, v2, 0, 0 ; D2820002 04080904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1e ; C202011E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v1, s4, v2, 0, 0 ; D2820002 04080901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v3 ; 100C0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x11 ; C2020111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v5, s4, v6, 0, 0 ; D2820006 04180905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v4, s4, v6, 0, 0 ; D2820006 04180904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1d ; C202011D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v1, s4, v6, 0, 0 ; D2820006 04180901 V_MAD_F32 v6, v6, -5.000000e-01, 5.000000e-01, 0, 0 ; D2820006 03C1E306 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v3 ; 100E0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v5, s4, v7, 0, 0 ; D2820007 041C0905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v4, s4, v7, 0, 0 ; D2820007 041C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1c ; C202011C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v1, s4, v7, 0, 0 ; D2820007 041C0901 V_MAD_F32 v7, v7, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820007 03C1E107 EXP 15, 33, 0, 0, 0, v7, v6, v2, v0 ; F800021F 00020607 S_WAITCNT expcnt(0) ; BF8C070F V_MOV_B32_e32 v0, 1.000000e+00 ; 7E0002F2 EXP 15, 34, 0, 0, 0, v5, v3, v4, v0 ; F800022F 00040305 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v2, s4, v3 ; 10040604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v5, s4, v2, 0, 0 ; D2820002 04080905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v4, s4, v2, 0, 0 ; D2820002 04080904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v1, s4, v2, 0, 0 ; D2820002 04080901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v3 ; 100C0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v5, s4, v6, 0, 0 ; D2820006 04180905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v4, s4, v6, 0, 0 ; D2820006 04180904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v1, s4, v6, 0, 0 ; D2820006 04180901 V_RCP_F32_e32 v7, v6 ; 7E0E5506 V_MUL_F32_e32 v8, v2, v7 ; 10100F02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v9, s4, v3 ; 10120604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v9, v5, s4, v9, 0, 0 ; D2820009 04240905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v9, v4, s4, v9, 0, 0 ; D2820009 04240904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xc ; C202010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v9, v1, s4, v9, 0, 0 ; D2820009 04240901 V_MUL_F32_e32 v7, v9, v7 ; 100E0F09 V_MOV_B32_e32 v10, 0.000000e+00 ; 7E140280 EXP 15, 35, 0, 0, 0, v7, v8, v10, v0 ; F800023F 000A0807 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v0, s4, v3 ; 10000604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v5, s4, v0, 0, 0 ; D2820000 04000905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xe ; C200010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v1, s0, v0, 0, 0 ; D2820000 04000101 EXP 15, 12, 0, 1, 0, v9, v2, v0, v6 ; F80008CF 06000209 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL CONST[0..8] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 1.4427, 1.0000, 0.5000, -0.5000} IMM[1] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xyz, -CONST[0], IN[2] 1: MUL TEMP[1].w, TEMP[0].zzzz, CONST[8].xxxx 2: MUL TEMP[1].w, TEMP[1].wwww, IMM[0].xxxx 3: EX2 TEMP[1].x, TEMP[1].wwww 4: ADD TEMP[1].w, -TEMP[1].xxxx, IMM[0].yyyy 5: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 6: RCP TEMP[3].x, TEMP[0].zzzz 7: MUL TEMP[2].x, TEMP[2].xxxx, CONST[8].yyyy 8: MUL TEMP[1].x, TEMP[1].wwww, TEMP[2].xxxx 9: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx 10: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 11: MOV TEMP[0].x, TEMP[1].xxxx 12: EX2 TEMP[1].x, TEMP[1].xxxx 13: MOV_SAT TEMP[0].x, TEMP[1].xxxx 14: ADD TEMP[1].x, -TEMP[0].xxxx, IMM[0].yyyy 15: MAD TEMP[2].x, IN[3].yyyy, IMM[0].zzzz, IMM[0].zzzz 16: MOV TEMP[2].x, TEMP[2].xxxx 17: MOV TEMP[2].y, CONST[8].wwww 18: MOV TEMP[3].xy, TEMP[2].xyyy 19: TEX TEMP[3].x, TEMP[3], SAMP[2], 2D 20: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 21: MOV TEMP[0].x, TEMP[1].xxxx 22: MUL TEMP[3].yzw, CONST[4].xxyw, IN[2].yyyy 23: MOV TEMP[0].yzw, TEMP[3].zyzw 24: MAD TEMP[3].yzw, IN[2].xxxx, CONST[3].xxyw, TEMP[0] 25: MOV TEMP[0].yzw, TEMP[3].zyzw 26: MAD TEMP[3].yzw, IN[2].zzzz, CONST[5].xxyw, TEMP[0] 27: MOV TEMP[0].yzw, TEMP[3].zyzw 28: ADD TEMP[3].yzw, TEMP[0], CONST[6].xxyw 29: MOV TEMP[0].yz, TEMP[3].zyzw 30: RCP TEMP[3].x, TEMP[3].wwww 31: MOV TEMP[0].w, TEMP[3].xxxx 32: MUL TEMP[3].yz, TEMP[3].xxxx, TEMP[0] 33: MOV TEMP[0].yz, TEMP[3].zyzz 34: MAD TEMP[3].yz, TEMP[0], IMM[0].zzwy, IMM[0].wwww 35: MOV TEMP[0].yz, TEMP[3].zyzz 36: MAD TEMP[0].xy, TEMP[0].yzzw, IMM[1].xyxx, IMM[1].zxzz 37: MOV TEMP[0].xy, TEMP[0].xyyy 38: TEX TEMP[0].x, TEMP[0], SAMP[3], 2D 39: MOV_SAT TEMP[0].x, TEMP[0].xxxx 40: MUL TEMP[0].y, TEMP[0].xxxx, CONST[1].xxxx 41: MAD TEMP[3].xy, IN[0], IMM[1].xyxx, IMM[1].zxzz 42: MOV TEMP[3].xy, TEMP[3].xyyy 43: TEX TEMP[3], TEMP[3], SAMP[0], 2D 44: MOV TEMP[2].w, TEMP[3] 45: MOV TEMP[4], -CONST[2].xxxx 46: FSGE TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz 47: UIF TEMP[5].xxxx :0 48: MOV TEMP[5].x, IMM[1].zzzz 49: ELSE :0 50: MOV TEMP[5].x, TEMP[3].xxxx 51: ENDIF 52: MOV TEMP[5].x, TEMP[5].xxxx 53: FSGE TEMP[6].x, TEMP[4].yyyy, IMM[1].zzzz 54: UIF TEMP[6].xxxx :0 55: MOV TEMP[6].x, IMM[1].zzzz 56: ELSE :0 57: MOV TEMP[6].x, TEMP[3].yyyy 58: ENDIF 59: MOV TEMP[5].y, TEMP[6].xxxx 60: FSGE TEMP[6].x, TEMP[4].zzzz, IMM[1].zzzz 61: UIF TEMP[6].xxxx :0 62: MOV TEMP[6].x, IMM[1].zzzz 63: ELSE :0 64: MOV TEMP[6].x, TEMP[3].zzzz 65: ENDIF 66: MOV TEMP[5].z, TEMP[6].xxxx 67: FSGE TEMP[3].x, TEMP[4].wwww, IMM[1].zzzz 68: UIF TEMP[3].xxxx :0 69: ELSE :0 70: ENDIF 71: MOV TEMP[2].xyz, TEMP[5].xyzx 72: MOV TEMP[3].xyz, TEMP[5].xyzz 73: TEX TEMP[3], TEMP[3], SAMP[7], 3D 74: LRP TEMP[0].xyz, TEMP[0].yyyy, TEMP[3], TEMP[2] 75: MUL TEMP[2], IN[1], IMM[1].xyxx 76: ADD TEMP[3].xy, TEMP[2], IMM[1].zxzz 77: MOV TEMP[3].xy, TEMP[3].xyyy 78: TEX TEMP[3], TEMP[3], SAMP[1], 2D 79: ADD TEMP[4].y, -TEMP[3].wwww, IMM[0].yyyy 80: MAD TEMP[0].yzw, TEMP[0].xxyz, TEMP[4].yyyy, TEMP[3].xxyz 81: ADD TEMP[2].xy, TEMP[2], IMM[1].zxzz 82: MOV TEMP[2].xy, TEMP[2].xyyy 83: TEX TEMP[2].xzw, TEMP[2], SAMP[4], 2D 84: MOV TEMP[3].w, TEMP[2].wwww 85: MOV TEMP[4].xy, IN[0].zwww 86: TEX TEMP[4], TEMP[4], SAMP[5], 2D 87: LRP TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].yzww, TEMP[4] 88: MOV TEMP[4].xyz, TEMP[0].xyzx 89: MAD TEMP[2].y, TEMP[2].zzzz, -TEMP[2].xxxx, TEMP[2].xxxx 90: MOV TEMP[0].xyz, TEMP[0].xyzz 91: TEX TEMP[0], TEMP[0], SAMP[6], 3D 92: LRP TEMP[0].xyz, TEMP[2].yyyy, TEMP[0], TEMP[4] 93: MOV TEMP[3].xyz, TEMP[0].xyzx 94: ADD TEMP[0].yzw, -TEMP[0].xxyz, CONST[7].xxyz 95: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[3] 96: MOV TEMP[0].xyz, TEMP[0].xyzx 97: MOV TEMP[0].w, IMM[0].yyyy 98: MOV OUT[0], TEMP[0] 99: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %47 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %48 = load <8 x i32> addrspace(2)* %47, !tbaa !0 %49 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %50 = load <4 x i32> addrspace(2)* %49, !tbaa !0 %51 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %52 = load <8 x i32> addrspace(2)* %51, !tbaa !0 %53 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %54 = load <4 x i32> addrspace(2)* %53, !tbaa !0 %55 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %56 = load <8 x i32> addrspace(2)* %55, !tbaa !0 %57 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %58 = load <4 x i32> addrspace(2)* %57, !tbaa !0 %59 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %60 = load <8 x i32> addrspace(2)* %59, !tbaa !0 %61 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %62 = load <4 x i32> addrspace(2)* %61, !tbaa !0 %63 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %64 = load <8 x i32> addrspace(2)* %63, !tbaa !0 %65 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %66 = load <4 x i32> addrspace(2)* %65, !tbaa !0 %67 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %68 = load <8 x i32> addrspace(2)* %67, !tbaa !0 %69 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %70 = load <4 x i32> addrspace(2)* %69, !tbaa !0 %71 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %72 = load <8 x i32> addrspace(2)* %71, !tbaa !0 %73 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %74 = load <4 x i32> addrspace(2)* %73, !tbaa !0 %75 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %76 = load <8 x i32> addrspace(2)* %75, !tbaa !0 %77 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %78 = load <4 x i32> addrspace(2)* %77, !tbaa !0 %79 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = fsub float -0.000000e+00, %24 %90 = fadd float %89, %85 %91 = fsub float -0.000000e+00, %25 %92 = fadd float %91, %86 %93 = fsub float -0.000000e+00, %26 %94 = fadd float %93, %87 %95 = fmul float %94, %44 %96 = fmul float %95, 0x3FF7154CA0000000 %97 = call float @llvm.AMDIL.exp.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %98, 1.000000e+00 %100 = fmul float %90, %90 %101 = fmul float %92, %92 %102 = fadd float %101, %100 %103 = fmul float %94, %94 %104 = fadd float %102, %103 %105 = fdiv float 1.000000e+00, %94 %106 = fmul float %104, %45 %107 = fmul float %99, %106 %108 = fmul float %105, %107 %109 = fmul float %108, 0x3FF7154CA0000000 %110 = call float @llvm.AMDIL.exp.(float %109) %111 = call float @llvm.AMDIL.clamp.(float %110, float 0.000000e+00, float 1.000000e+00) %112 = fsub float -0.000000e+00, %111 %113 = fadd float %112, 1.000000e+00 %114 = fmul float %88, 5.000000e-01 %115 = fadd float %114, 5.000000e-01 %116 = bitcast float %115 to i32 %117 = bitcast float %46 to i32 %118 = insertelement <2 x i32> undef, i32 %116, i32 0 %119 = insertelement <2 x i32> %118, i32 %117, i32 1 %120 = bitcast <8 x i32> %56 to <32 x i8> %121 = bitcast <4 x i32> %58 to <16 x i8> %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %119, <32 x i8> %120, <16 x i8> %121, i32 2) %123 = extractelement <4 x float> %122, i32 0 %124 = fmul float %113, %123 %125 = fmul float %32, %86 %126 = fmul float %33, %86 %127 = fmul float %34, %86 %128 = fmul float %85, %29 %129 = fadd float %128, %125 %130 = fmul float %85, %30 %131 = fadd float %130, %126 %132 = fmul float %85, %31 %133 = fadd float %132, %127 %134 = fmul float %87, %35 %135 = fadd float %134, %129 %136 = fmul float %87, %36 %137 = fadd float %136, %131 %138 = fmul float %87, %37 %139 = fadd float %138, %133 %140 = fadd float %135, %38 %141 = fadd float %137, %39 %142 = fadd float %139, %40 %143 = fdiv float 1.000000e+00, %142 %144 = fmul float %143, %140 %145 = fmul float %143, %141 %146 = fmul float %144, 5.000000e-01 %147 = fadd float %146, -5.000000e-01 %148 = fmul float %145, -5.000000e-01 %149 = fadd float %148, -5.000000e-01 %150 = fmul float %147, 1.000000e+00 %151 = fadd float %150, 0.000000e+00 %152 = fmul float %149, -1.000000e+00 %153 = fadd float %152, 1.000000e+00 %154 = bitcast float %151 to i32 %155 = bitcast float %153 to i32 %156 = insertelement <2 x i32> undef, i32 %154, i32 0 %157 = insertelement <2 x i32> %156, i32 %155, i32 1 %158 = bitcast <8 x i32> %60 to <32 x i8> %159 = bitcast <4 x i32> %62 to <16 x i8> %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %157, <32 x i8> %158, <16 x i8> %159, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00) %163 = fmul float %162, %27 %164 = fmul float %79, 1.000000e+00 %165 = fadd float %164, 0.000000e+00 %166 = fmul float %80, -1.000000e+00 %167 = fadd float %166, 1.000000e+00 %168 = bitcast float %165 to i32 %169 = bitcast float %167 to i32 %170 = insertelement <2 x i32> undef, i32 %168, i32 0 %171 = insertelement <2 x i32> %170, i32 %169, i32 1 %172 = bitcast <8 x i32> %48 to <32 x i8> %173 = bitcast <4 x i32> %50 to <16 x i8> %174 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %171, <32 x i8> %172, <16 x i8> %173, i32 2) %175 = extractelement <4 x float> %174, i32 0 %176 = extractelement <4 x float> %174, i32 1 %177 = extractelement <4 x float> %174, i32 2 %178 = fsub float -0.000000e+00, %28 %179 = fsub float -0.000000e+00, %28 %180 = fsub float -0.000000e+00, %28 %181 = fsub float -0.000000e+00, %28 %182 = fcmp oge float %178, 0.000000e+00 %183 = sext i1 %182 to i32 %184 = bitcast i32 %183 to float %185 = bitcast float %184 to i32 %186 = icmp ne i32 %185, 0 %. = select i1 %186, float 0.000000e+00, float %175 %187 = fcmp oge float %179, 0.000000e+00 %188 = sext i1 %187 to i32 %189 = bitcast i32 %188 to float %190 = bitcast float %189 to i32 %191 = icmp ne i32 %190, 0 %temp24.0 = select i1 %191, float 0.000000e+00, float %176 %192 = fcmp oge float %180, 0.000000e+00 %193 = sext i1 %192 to i32 %194 = bitcast i32 %193 to float %195 = bitcast float %194 to i32 %196 = icmp ne i32 %195, 0 %.37 = select i1 %196, float 0.000000e+00, float %177 %197 = fcmp oge float %181, 0.000000e+00 %198 = sext i1 %197 to i32 %199 = bitcast i32 %198 to float %200 = bitcast float %199 to i32 %201 = icmp ne i32 %200, 0 %202 = bitcast float %. to i32 %203 = bitcast float %temp24.0 to i32 %204 = bitcast float %.37 to i32 %205 = insertelement <4 x i32> undef, i32 %202, i32 0 %206 = insertelement <4 x i32> %205, i32 %203, i32 1 %207 = insertelement <4 x i32> %206, i32 %204, i32 2 %208 = insertelement <4 x i32> %207, i32 undef, i32 3 %209 = bitcast <8 x i32> %76 to <32 x i8> %210 = bitcast <4 x i32> %78 to <16 x i8> %211 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %208, <32 x i8> %209, <16 x i8> %210, i32 3) %212 = extractelement <4 x float> %211, i32 0 %213 = extractelement <4 x float> %211, i32 1 %214 = extractelement <4 x float> %211, i32 2 %215 = call float @llvm.AMDGPU.lrp(float %163, float %212, float %.) %216 = call float @llvm.AMDGPU.lrp(float %163, float %213, float %temp24.0) %217 = call float @llvm.AMDGPU.lrp(float %163, float %214, float %.37) %218 = fmul float %83, 1.000000e+00 %219 = fmul float %84, -1.000000e+00 %220 = fadd float %218, 0.000000e+00 %221 = fadd float %219, 1.000000e+00 %222 = bitcast float %220 to i32 %223 = bitcast float %221 to i32 %224 = insertelement <2 x i32> undef, i32 %222, i32 0 %225 = insertelement <2 x i32> %224, i32 %223, i32 1 %226 = bitcast <8 x i32> %52 to <32 x i8> %227 = bitcast <4 x i32> %54 to <16 x i8> %228 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %226, <16 x i8> %227, i32 2) %229 = extractelement <4 x float> %228, i32 0 %230 = extractelement <4 x float> %228, i32 1 %231 = extractelement <4 x float> %228, i32 2 %232 = extractelement <4 x float> %228, i32 3 %233 = fsub float -0.000000e+00, %232 %234 = fadd float %233, 1.000000e+00 %235 = fmul float %215, %234 %236 = fadd float %235, %229 %237 = fmul float %216, %234 %238 = fadd float %237, %230 %239 = fmul float %217, %234 %240 = fadd float %239, %231 %241 = fadd float %218, 0.000000e+00 %242 = fadd float %219, 1.000000e+00 %243 = bitcast float %241 to i32 %244 = bitcast float %242 to i32 %245 = insertelement <2 x i32> undef, i32 %243, i32 0 %246 = insertelement <2 x i32> %245, i32 %244, i32 1 %247 = bitcast <8 x i32> %64 to <32 x i8> %248 = bitcast <4 x i32> %66 to <16 x i8> %249 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %246, <32 x i8> %247, <16 x i8> %248, i32 2) %250 = extractelement <4 x float> %249, i32 0 %251 = extractelement <4 x float> %249, i32 2 %252 = bitcast float %81 to i32 %253 = bitcast float %82 to i32 %254 = insertelement <2 x i32> undef, i32 %252, i32 0 %255 = insertelement <2 x i32> %254, i32 %253, i32 1 %256 = bitcast <8 x i32> %68 to <32 x i8> %257 = bitcast <4 x i32> %70 to <16 x i8> %258 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %255, <32 x i8> %256, <16 x i8> %257, i32 2) %259 = extractelement <4 x float> %258, i32 0 %260 = extractelement <4 x float> %258, i32 1 %261 = extractelement <4 x float> %258, i32 2 %262 = call float @llvm.AMDGPU.lrp(float %250, float %236, float %259) %263 = call float @llvm.AMDGPU.lrp(float %250, float %238, float %260) %264 = call float @llvm.AMDGPU.lrp(float %250, float %240, float %261) %265 = fsub float -0.000000e+00, %250 %266 = fmul float %251, %265 %267 = fadd float %266, %250 %268 = bitcast float %262 to i32 %269 = bitcast float %263 to i32 %270 = bitcast float %264 to i32 %271 = insertelement <4 x i32> undef, i32 %268, i32 0 %272 = insertelement <4 x i32> %271, i32 %269, i32 1 %273 = insertelement <4 x i32> %272, i32 %270, i32 2 %274 = insertelement <4 x i32> %273, i32 undef, i32 3 %275 = bitcast <8 x i32> %72 to <32 x i8> %276 = bitcast <4 x i32> %74 to <16 x i8> %277 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %274, <32 x i8> %275, <16 x i8> %276, i32 3) %278 = extractelement <4 x float> %277, i32 0 %279 = extractelement <4 x float> %277, i32 1 %280 = extractelement <4 x float> %277, i32 2 %281 = call float @llvm.AMDGPU.lrp(float %267, float %278, float %262) %282 = call float @llvm.AMDGPU.lrp(float %267, float %279, float %263) %283 = call float @llvm.AMDGPU.lrp(float %267, float %280, float %264) %284 = fsub float -0.000000e+00, %281 %285 = fadd float %284, %41 %286 = fsub float -0.000000e+00, %282 %287 = fadd float %286, %42 %288 = fsub float -0.000000e+00, %283 %289 = fadd float %288, %43 %290 = fmul float %124, %285 %291 = fadd float %290, %281 %292 = fmul float %124, %287 %293 = fadd float %292, %282 %294 = fmul float %124, %289 %295 = fadd float %294, %283 %296 = call i32 @llvm.SI.packf16(float %291, float %293) %297 = bitcast i32 %296 to float %298 = call i32 @llvm.SI.packf16(float %295, float 1.000000e+00) %299 = bitcast i32 %298 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %297, float %299, float %297, float %299) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 2, [m0] ; C8080800 V_INTERP_P2_F32 v2, [v2], v1, 0, 2, [m0] ; C8090801 V_INTERP_P1_F32 v3, v0, 1, 2, [m0] ; C80C0900 V_INTERP_P2_F32 v3, [v3], v1, 1, 2, [m0] ; C80D0901 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x10 ; C2040110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s8, v3 ; 10080608 S_BUFFER_LOAD_DWORD s8, s[0:3], 0xc ; C204010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v2, s8, v4, 0, 0 ; D2820004 04101102 V_INTERP_P1_F32 v5, v0, 2, 2, [m0] ; C8140A00 V_INTERP_P2_F32 v5, [v5], v1, 2, 2, [m0] ; C8150A01 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x14 ; C2040114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v5, s8, v4, 0, 0 ; D2820004 04101105 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x18 ; C2040118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s8, v4 ; 06080808 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x13 ; C2040113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s8, v3 ; 100C0608 S_BUFFER_LOAD_DWORD s8, s[0:3], 0xf ; C204010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v2, s8, v6, 0, 0 ; D2820006 04181102 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x17 ; C2040117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v5, s8, v6, 0, 0 ; D2820006 04181105 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x1b ; C204011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s8, v6 ; 060C0C08 V_RCP_F32_e32 v6, v6 ; 7E0C5506 V_MUL_F32_e32 v4, v6, v4 ; 10080906 V_MAD_F32 v7, v4, 5.000000e-01, -5.000000e-01, 0, 0 ; D2820007 03C5E104 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x11 ; C2040111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s8, v3 ; 10080608 S_BUFFER_LOAD_DWORD s8, s[0:3], 0xd ; C204010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v2, s8, v4, 0, 0 ; D2820004 04101102 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x15 ; C2040115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v5, s8, v4, 0, 0 ; D2820004 04101105 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x19 ; C2040119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s8, v4 ; 06080808 V_MUL_F32_e32 v4, v6, v4 ; 10080906 V_MAD_F32 v4, v4, -5.000000e-01, -5.000000e-01, 0, 0 ; D2820004 03C5E304 V_SUB_F32_e32 v8, 1.000000e+00, v4 ; 081008F2 S_LOAD_DWORDX4 s[12:15], s[4:5], 0xc ; C086050C S_LOAD_DWORDX8 s[16:23], s[6:7], 0x18 ; C0C80718 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v4, 1, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[16:23], s[12:15] ; F0800100 00640407 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e64 v4, v4, 0, 1, 0 ; D2060804 00010104 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x4 ; C2040104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s8, v4 ; 10080808 V_SUB_F32_e32 v6, 1.000000e+00, v4 ; 080C08F2 V_INTERP_P1_F32 v7, v0, 0, 0, [m0] ; C81C0000 V_INTERP_P2_F32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 V_INTERP_P1_F32 v9, v0, 1, 0, [m0] ; C8240100 V_INTERP_P2_F32 v9, [v9], v1, 1, 0, [m0] ; C8250101 V_SUB_F32_e32 v8, 1.000000e+00, v9 ; 081012F2 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x0 ; C0860500 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x0 ; C0C80700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[16:23], s[12:15] ; F0800700 00640707 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x8 ; C2040108 V_MOV_B32_e32 v10, 0x80000000 ; 7E1402FF 80000000 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_XOR_B32_e32 v10, s8, v10 ; 3A141408 V_CMP_GE_F32_e64 s[10:11], v10, 0.000000e+00, 0, 0 ; D00C000A 0001010A V_CNDMASK_B32_e64 v12, v9, 0, s[10:11], 0, 0, 0, 0 ; D200000C 00290109 V_MUL_F32_e32 v14, v6, v12 ; 101C1906 V_CNDMASK_B32_e64 v11, v8, 0, s[10:11], 0, 0, 0, 0 ; D200000B 00290108 V_CNDMASK_B32_e64 v10, v7, 0, s[10:11], 0, 0, 0, 0 ; D200000A 00290107 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x1c ; C086051C S_LOAD_DWORDX8 s[16:23], s[6:7], 0x38 ; C0C80738 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[16:23], s[12:15] ; F0800700 0064070A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v14, v4, v9, v14, 0, 0 ; D282000E 043A1304 V_INTERP_P1_F32 v15, v0, 0, 1, [m0] ; C83C0400 V_INTERP_P2_F32 v15, [v15], v1, 0, 1, [m0] ; C83D0401 V_INTERP_P1_F32 v17, v0, 1, 1, [m0] ; C8440500 V_INTERP_P2_F32 v17, [v17], v1, 1, 1, [m0] ; C8450501 V_SUB_F32_e32 v16, 1.000000e+00, v17 ; 082022F2 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x4 ; C0860504 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x8 ; C0C80708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800F00 0064110F S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v21, 1.000000e+00, v20 ; 082A28F2 V_MAD_F32 v14, v14, v21, v19, 0, 0 ; D282000E 044E2B0E V_INTERP_P1_F32 v23, v0, 3, 0, [m0] ; C85C0300 V_INTERP_P2_F32 v23, [v23], v1, 3, 0, [m0] ; C85D0301 V_INTERP_P1_F32 v22, v0, 2, 0, [m0] ; C8580200 V_INTERP_P2_F32 v22, [v22], v1, 2, 0, [m0] ; C8590201 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x14 ; C0860514 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x28 ; C0C80728 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800700 00641616 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x10 ; C0860510 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x20 ; C0C80720 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 IMAGE_SAMPLE v[15:16], 5, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800500 00640F0F S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v25, 1.000000e+00, v15 ; 08321EF2 V_MUL_F32_e32 v26, v25, v24 ; 10343119 V_MAD_F32 v28, v15, v14, v26, 0, 0 ; D282001C 046A1D0F V_MUL_F32_e32 v14, v6, v11 ; 101C1706 V_MAD_F32 v14, v4, v8, v14, 0, 0 ; D282000E 043A1104 V_MAD_F32 v14, v14, v21, v18, 0, 0 ; D282000E 044A2B0E V_MUL_F32_e32 v30, v25, v23 ; 103C2F19 V_MAD_F32 v27, v15, v14, v30, 0, 0 ; D282001B 047A1D0F V_MUL_F32_e32 v6, v6, v10 ; 100C1506 V_MAD_F32 v4, v4, v7, v6, 0, 0 ; D2820004 041A0F04 V_MAD_F32 v4, v4, v21, v17, 0, 0 ; D2820004 04462B04 V_MUL_F32_e32 v6, v25, v22 ; 100C2D19 V_MAD_F32 v26, v15, v4, v6, 0, 0 ; D282001A 041A090F S_LOAD_DWORDX4 s[12:15], s[4:5], 0x18 ; C0860518 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x30 ; C0C80730 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[16:23], s[12:15] ; F0800700 0064061A V_MUL_F32_e32 v4, v16, v15 ; 10081F10 V_SUB_F32_e32 v4, v15, v4 ; 0808090F V_SUB_F32_e32 v9, 1.000000e+00, v4 ; 081208F2 V_MUL_F32_e32 v10, v9, v27 ; 10143709 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v10, v4, v7, v10, 0, 0 ; D282000A 042A0F04 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x1d ; C204011D S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUB_F32_e32 v11, s8, v10 ; 08161408 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x1 ; C2040101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v3, s8, v3 ; 0A060608 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v2, s8, v2 ; 0A040408 V_MUL_F32_e32 v2, v2, v2 ; 10040502 V_MAD_F32 v2, v3, v3, v2, 0, 0 ; D2820002 040A0703 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x2 ; C2040102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v3, s8, v5 ; 0A060A08 V_MAD_F32 v2, v3, v3, v2, 0, 0 ; D2820002 040A0703 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x21 ; C2040121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s8, v2 ; 10040408 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x20 ; C2040120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s8, v3 ; 100A0608 V_MUL_F32_e32 v5, 1.442700e+00, v5 ; 100A0AFF 3FB8AA65 V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_SUB_F32_e32 v5, 1.000000e+00, v5 ; 080A0AF2 V_MUL_F32_e32 v2, v5, v2 ; 10040505 V_RCP_F32_e32 v3, v3 ; 7E065503 V_MUL_F32_e32 v2, v3, v2 ; 10040503 V_MUL_F32_e32 v2, 1.442700e+00, v2 ; 100404FF 3FB8AA65 V_EXP_F32_e32 v2, v2 ; 7E044B02 V_ADD_F32_e64 v2, v2, 0, 1, 0 ; D2060802 00010102 V_SUB_F32_e32 v2, 1.000000e+00, v2 ; 080404F2 V_INTERP_P1_F32 v3, v0, 1, 3, [m0] ; C80C0D00 V_INTERP_P2_F32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 V_MAD_F32 v0, v3, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820000 03C1E103 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x23 ; C2040123 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s8 ; 7E020208 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x8 ; C0840508 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x10 ; C0C60710 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[8:11] ; F0800100 00430000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v2, v0 ; 10000102 V_MAD_F32 v1, v0, v11, v10, 0, 0 ; D2820001 042A1700 V_MUL_F32_e32 v2, v9, v26 ; 10043509 V_MAD_F32 v2, v4, v6, v2, 0, 0 ; D2820002 040A0D04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1c ; C202011C S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUB_F32_e32 v3, s4, v2 ; 08060404 V_MAD_F32 v2, v0, v3, v2, 0, 0 ; D2820002 040A0700 V_CVT_PKRTZ_F16_F32_e32 v1, v2, v1 ; 5E020302 V_MUL_F32_e32 v2, v9, v28 ; 10043909 V_MAD_F32 v2, v4, v8, v2, 0, 0 ; D2820002 040A1104 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x1e ; C200011E S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUB_F32_e32 v3, s0, v2 ; 08060400 V_MAD_F32 v0, v0, v3, v2, 0, 0 ; D2820000 040A0700 V_CVT_PKRTZ_F16_F32_e64 v0, v0, 1.000000e+00, 0, 0 ; D25E0000 0001E500 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL OUT[7], GENERIC[15] DCL CONST[0..10] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, -1.0000, -0.5000} IMM[1] FLT32 { 0.5000, -0.5000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[8].xxxx, CONST[9].xxxx 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MAD TEMP[1], CONST[8].yyyy, IMM[0].xyyx, IMM[0].yxzy 3: MUL TEMP[2].xy, TEMP[1], IN[0] 4: MOV TEMP[1].xy, TEMP[2].xyxx 5: MAD TEMP[1].xy, TEMP[1], TEMP[1].zwzw, CONST[9].xxxx 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: MAD TEMP[2].xy, IN[0], -CONST[8].zzzz, CONST[9].xxxx 8: MOV TEMP[2].xy, TEMP[2].xyxx 9: MAD TEMP[3], CONST[8].ywwy, IMM[0].yxxy, IMM[0].xyyz 10: MUL TEMP[4].xy, TEMP[3], IN[0] 11: MOV TEMP[3].xy, TEMP[4].xyxx 12: MAD TEMP[4].xy, TEMP[3], TEMP[3].zwzw, CONST[9].xxxx 13: MOV TEMP[4].xy, TEMP[4].xyxx 14: MUL TEMP[5].xy, CONST[5], IN[0].yyyy 15: MOV TEMP[3].xy, TEMP[5].xyxx 16: MAD TEMP[5].xy, IN[0].xxxx, CONST[4], TEMP[3] 17: MOV TEMP[3].xy, TEMP[5].xyxx 18: MAD TEMP[5].xy, CONST[10].xxxx, CONST[6], TEMP[3] 19: MOV TEMP[3].xy, TEMP[5].xyxx 20: ADD TEMP[5].xy, TEMP[3], CONST[7] 21: MOV TEMP[3].xy, TEMP[5].xyxx 22: MAD TEMP[5].xy, TEMP[3], IMM[1].xyzz, IMM[0].wwww 23: MOV TEMP[5].xy, TEMP[5].xyxx 24: MUL TEMP[3], CONST[1], IN[0].yyyy 25: MAD TEMP[3], IN[0].xxxx, CONST[0], TEMP[3] 26: MAD TEMP[3], CONST[10].xxxx, CONST[2], TEMP[3] 27: ADD TEMP[3], TEMP[3], CONST[3] 28: MOV TEMP[6].xyz, TEMP[3].xywx 29: MOV TEMP[7].xy, IN[0].xyxx 30: MOV TEMP[7].z, CONST[10].xxxx 31: MOV TEMP[5].zw, IN[0].wwzw 32: MOV TEMP[0].zw, IMM[0].xxyx 33: MOV TEMP[1].zw, IMM[0].xxyx 34: MOV TEMP[2].zw, IMM[0].xxyx 35: MOV TEMP[4].zw, IMM[0].xxyx 36: MOV TEMP[7].w, IMM[0].xxxx 37: MOV TEMP[6].w, IMM[0].xxxx 38: MOV OUT[6], TEMP[6] 39: MOV OUT[7], TEMP[5] 40: MOV OUT[1], TEMP[0] 41: MOV OUT[0], TEMP[3] 42: MOV OUT[2], TEMP[1] 43: MOV OUT[3], TEMP[2] 44: MOV OUT[4], TEMP[4] 45: MOV OUT[5], TEMP[7] 46: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %43 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = fmul float %47, %37 %52 = fadd float %51, %41 %53 = fmul float %48, %37 %54 = fadd float %53, %41 %55 = fmul float %38, 1.000000e+00 %56 = fadd float %55, 0.000000e+00 %57 = fmul float %38, 0.000000e+00 %58 = fadd float %57, 1.000000e+00 %59 = fmul float %38, 0.000000e+00 %60 = fadd float %59, -1.000000e+00 %61 = fmul float %38, 1.000000e+00 %62 = fadd float %61, 0.000000e+00 %63 = fmul float %56, %47 %64 = fmul float %58, %48 %65 = fmul float %63, %60 %66 = fadd float %65, %41 %67 = fmul float %64, %62 %68 = fadd float %67, %41 %69 = fsub float -0.000000e+00, %39 %70 = fmul float %47, %69 %71 = fadd float %70, %41 %72 = fsub float -0.000000e+00, %39 %73 = fmul float %48, %72 %74 = fadd float %73, %41 %75 = fmul float %38, 0.000000e+00 %76 = fadd float %75, 1.000000e+00 %77 = fmul float %40, 1.000000e+00 %78 = fadd float %77, 0.000000e+00 %79 = fmul float %40, 1.000000e+00 %80 = fadd float %79, 0.000000e+00 %81 = fmul float %38, 0.000000e+00 %82 = fadd float %81, -1.000000e+00 %83 = fmul float %76, %47 %84 = fmul float %78, %48 %85 = fmul float %83, %80 %86 = fadd float %85, %41 %87 = fmul float %84, %82 %88 = fadd float %87, %41 %89 = fmul float %31, %48 %90 = fmul float %32, %48 %91 = fmul float %47, %29 %92 = fadd float %91, %89 %93 = fmul float %47, %30 %94 = fadd float %93, %90 %95 = fmul float %42, %33 %96 = fadd float %95, %92 %97 = fmul float %42, %34 %98 = fadd float %97, %94 %99 = fadd float %96, %35 %100 = fadd float %98, %36 %101 = fmul float %99, 5.000000e-01 %102 = fadd float %101, -5.000000e-01 %103 = fmul float %100, -5.000000e-01 %104 = fadd float %103, -5.000000e-01 %105 = fmul float %17, %48 %106 = fmul float %18, %48 %107 = fmul float %19, %48 %108 = fmul float %20, %48 %109 = fmul float %47, %13 %110 = fadd float %109, %105 %111 = fmul float %47, %14 %112 = fadd float %111, %106 %113 = fmul float %47, %15 %114 = fadd float %113, %107 %115 = fmul float %47, %16 %116 = fadd float %115, %108 %117 = fmul float %42, %21 %118 = fadd float %117, %110 %119 = fmul float %42, %22 %120 = fadd float %119, %112 %121 = fmul float %42, %23 %122 = fadd float %121, %114 %123 = fmul float %42, %24 %124 = fadd float %123, %116 %125 = fadd float %118, %25 %126 = fadd float %120, %26 %127 = fadd float %122, %27 %128 = fadd float %124, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %52, float %54, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %66, float %68, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %71, float %74, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %47, float %48, float %42, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %125, float %126, float %128, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %102, float %104, float %49, float %50) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %125, float %126, float %127, float %128) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x24 ; C2020124 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v4, s4 ; 7E080204 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x20 ; C2028120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s5 ; 7E0A0205 V_MAD_F32 v6, v1, v5, v4, 0, 0 ; D2820006 04120B01 V_MAD_F32 v5, v0, v5, v4, 0, 0 ; D2820005 04120B00 V_MOV_B32_e32 v7, 1.000000e+00 ; 7E0E02F2 V_MOV_B32_e32 v8, 0.000000e+00 ; 7E100280 EXP 15, 32, 0, 0, 0, v5, v6, v8, v7 ; F800020F 07080605 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x21 ; C2028121 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MOV_B32_e32 v5, s5 ; 7E0A0205 V_MAD_F32 v5, v1, v5, v4, 0, 0 ; D2820005 04120B01 V_MUL_F32_e32 v6, s5, v0 ; 100C0005 V_SUB_F32_e32 v6, s4, v6 ; 080C0C04 EXP 15, 33, 0, 0, 0, v6, v5, v8, v7 ; F800021F 07080506 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x22 ; C2028122 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MOV_B32_e32 v5, s5 ; 7E0A0205 V_MUL_F32_e32 v6, v1, v5 ; 100C0B01 V_SUB_F32_e32 v6, s4, v6 ; 080C0C04 V_MUL_F32_e32 v5, v0, v5 ; 100A0B00 V_SUB_F32_e32 v5, s4, v5 ; 080A0A04 EXP 15, 34, 0, 0, 0, v5, v6, v8, v7 ; F800022F 07080605 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x23 ; C2028123 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MOV_B32_e32 v5, s5 ; 7E0A0205 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 V_MUL_F32_e32 v5, s5, v1 ; 100A0205 V_SUB_F32_e32 v5, s4, v5 ; 080A0A04 EXP 15, 35, 0, 0, 0, v4, v5, v8, v7 ; F800023F 07080504 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x28 ; C2020128 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MOV_B32_e32 v4, s4 ; 7E080204 EXP 15, 36, 0, 0, 0, v0, v1, v4, v7 ; F800024F 07040100 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x7 ; C2028107 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v4, s5, v1 ; 10080205 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x3 ; C2028103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s5 ; 7E0A0205 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xb ; C202810B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s5 ; 7E0A0205 V_MAD_F32 v4, s4, v5, v4, 0, 0 ; D2820004 04120A04 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xf ; C202810F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s5, v4 ; 06080805 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x5 ; C2028105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s5, v1 ; 100A0205 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x1 ; C2028101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s5 ; 7E0C0205 V_MAD_F32 v5, v0, v6, v5, 0, 0 ; D2820005 04160D00 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x9 ; C2028109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s5 ; 7E0C0205 V_MAD_F32 v5, s4, v6, v5, 0, 0 ; D2820005 04160C04 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xd ; C202810D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s5, v5 ; 060A0A05 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x4 ; C2028104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s5, v1 ; 100C0205 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x0 ; C2028100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s5 ; 7E100205 V_MAD_F32 v6, v0, v8, v6, 0, 0 ; D2820006 041A1100 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x8 ; C2028108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s5 ; 7E100205 V_MAD_F32 v6, s4, v8, v6, 0, 0 ; D2820006 041A1004 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xc ; C202810C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s5, v6 ; 060C0C05 EXP 15, 37, 0, 0, 0, v6, v5, v4, v7 ; F800025F 07040506 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x15 ; C2028115 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v7, s5, v1 ; 100E0205 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x11 ; C2028111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s5 ; 7E100205 V_MAD_F32 v7, v0, v8, v7, 0, 0 ; D2820007 041E1100 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x19 ; C2028119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s5 ; 7E100205 V_MAD_F32 v7, s4, v8, v7, 0, 0 ; D2820007 041E1004 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x1d ; C202811D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v7, s5, v7 ; 060E0E05 V_MAD_F32 v7, v7, -5.000000e-01, -5.000000e-01, 0, 0 ; D2820007 03C5E307 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x14 ; C2028114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s5, v1 ; 10100205 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x10 ; C2028110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s5 ; 7E120205 V_MAD_F32 v8, v0, v9, v8, 0, 0 ; D2820008 04221300 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x18 ; C2028118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s5 ; 7E120205 V_MAD_F32 v8, s4, v9, v8, 0, 0 ; D2820008 04221204 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x1c ; C202811C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v8, s5, v8 ; 06101005 V_MAD_F32 v8, v8, 5.000000e-01, -5.000000e-01, 0, 0 ; D2820008 03C5E108 EXP 15, 38, 0, 0, 0, v8, v7, v2, v3 ; F800026F 03020708 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x6 ; C2028106 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v7, s5, v1 ; 100E0205 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x2 ; C2028102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s5 ; 7E100205 V_MAD_F32 v0, v0, v8, v7, 0, 0 ; D2820000 041E1100 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xa ; C202810A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s5 ; 7E020205 V_MAD_F32 v0, s4, v1, v0, 0, 0 ; D2820000 04020204 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xe ; C200010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 EXP 15, 12, 0, 1, 0, v6, v5, v0, v4 ; F80008CF 04000506 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL IN[6], GENERIC[15], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL CONST[0..23] DCL TEMP[0..24], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, -0.0000} IMM[1] FLT32 { 4.0000, 2.2000, 1.0000, 2.0000} IMM[2] FLT32 { -1.0000, 1.0000, 4.0000, -2.0000} IMM[3] FLT32 { 0.0000, 0.5000, -16.0000, -1.4427} IMM[4] FLT32 { 16.0000, -1.4427, 0.0050, -0.5000} IMM[5] FLT32 { 0.5000, -0.5000, 6.0000, 0.2500} IMM[6] FLT32 { 2.0000, -2.0000, 6.0000, 2.2000} IMM[7] FLT32 { -0.6700, 0.4545, -0.4000, 0.4000} IMM[8] FLT32 { 1.4427, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[8], 2D 3: MOV TEMP[1].xz, TEMP[0] 4: ABS TEMP[2].x, TEMP[0] 5: MOV TEMP[2], -TEMP[2].xxxx 6: FSGE TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz 7: UIF TEMP[3].xxxx :0 8: MOV TEMP[3].x, IMM[0].yyyy 9: ELSE :0 10: MOV TEMP[3].x, IMM[0].wwww 11: ENDIF 12: MOV TEMP[3].x, TEMP[3].xxxx 13: FSGE TEMP[4].x, TEMP[2].yyyy, IMM[0].zzzz 14: UIF TEMP[4].xxxx :0 15: MOV TEMP[4].x, IMM[0].yyyy 16: ELSE :0 17: MOV TEMP[4].x, IMM[0].wwww 18: ENDIF 19: MOV TEMP[3].y, TEMP[4].xxxx 20: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[0].zzzz 21: UIF TEMP[4].xxxx :0 22: MOV TEMP[4].x, IMM[0].yyyy 23: ELSE :0 24: MOV TEMP[4].x, IMM[0].wwww 25: ENDIF 26: MOV TEMP[3].z, TEMP[4].xxxx 27: FSGE TEMP[2].x, TEMP[2].wwww, IMM[0].zzzz 28: UIF TEMP[2].xxxx :0 29: ELSE :0 30: ENDIF 31: FSLT TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz 32: OR TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz 33: OR TEMP[4].x, TEMP[4].xxxx, TEMP[3].yyyy 34: UIF TEMP[4].xxxx :0 35: KILL 36: ENDIF 37: MUL TEMP[3].yw, CONST[18].xxzy, IN[4].yyyy 38: MOV TEMP[1].yw, TEMP[3].wyww 39: MAD TEMP[3].yw, IN[4].xxxx, CONST[17].xxzy, TEMP[1] 40: MOV TEMP[1].yw, TEMP[3].wyww 41: MAD TEMP[3].yw, IN[4].zzzz, CONST[19].xxzy, TEMP[1] 42: MOV TEMP[1].yw, TEMP[3].wyww 43: ADD TEMP[3].yw, TEMP[1], CONST[20].xxzy 44: MOV TEMP[1].yw, TEMP[3].wyww 45: MOV TEMP[3].xy, IN[6].zwww 46: TEX TEMP[3].yzw, TEMP[3], SAMP[5], 2D 47: MOV TEMP[2].w, TEMP[3].zyzw 48: MUL TEMP[3], TEMP[3].wwww, IMM[1].xxxx 49: MOV_SAT TEMP[3], TEMP[3] 50: MOV TEMP[2].x, TEMP[3].xxxx 51: ADD TEMP[4].x, -TEMP[3].xxxx, IMM[0].xxxx 52: MOV TEMP[4].x, TEMP[4].xxxx 53: MOV TEMP[4].y, IMM[0].zzzz 54: MOV TEMP[5].xy, TEMP[4].xyyy 55: MOV TEMP[5].w, IMM[0].zzzz 56: TXL TEMP[5], TEMP[5], SAMP[4], 2D 57: POW TEMP[6].x, TEMP[5].xxxx, IMM[1].yyyy 58: POW TEMP[6].y, TEMP[5].yyyy, IMM[1].yyyy 59: POW TEMP[6].z, TEMP[5].zzzz, IMM[1].yyyy 60: POW TEMP[6].w, TEMP[5].wwww, IMM[1].zzzz 61: MOV TEMP[4].w, TEMP[6] 62: MAD TEMP[5].xy, TEMP[1].ywzw, IMM[0].xyxx, IMM[0].zxzz 63: MOV TEMP[5].xy, TEMP[5].xyyy 64: TEX TEMP[5].xyz, TEMP[5], SAMP[3], 2D 65: MAD TEMP[7].yw, TEMP[5].xxzy, IMM[1].wwww, IMM[0].yyyy 66: DP2 TEMP[5].x, IMM[0].xyyy, TEMP[7].ywww 67: DP2 TEMP[7].x, IMM[0].yyyy, TEMP[7].ywww 68: MOV TEMP[5].y, TEMP[7].xxxx 69: MOV TEMP[5].zw, -TEMP[5].yyxy 70: ADD TEMP[5], TEMP[5], IMM[1].wwww 71: MUL TEMP[5], TEMP[5], TEMP[5] 72: MUL TEMP[5], TEMP[5], TEMP[5] 73: DP4 TEMP[7].x, IMM[0].xxxx, TEMP[5] 74: RCP TEMP[7].x, TEMP[7].xxxx 75: MUL TEMP[5], TEMP[7].xxxx, TEMP[5] 76: RCP TEMP[7].x, CONST[0].xxxx 77: MUL TEMP[5], TEMP[7].xxxx, TEMP[5] 78: MUL TEMP[7], TEMP[5], TEMP[5] 79: MOV TEMP[8].xy, IN[0].xyyy 80: TEX TEMP[8], TEMP[8], SAMP[0], 2D 81: MOV TEMP[9].xy, IN[0].xyyy 82: TEX TEMP[9].xw, TEMP[9], SAMP[1], 2D 83: MOV TEMP[10].w, TEMP[9].wwww 84: MAD TEMP[11].yw, TEMP[8].xxzy, IMM[1].wwww, IMM[0].yyyy 85: MOV TEMP[1].yw, TEMP[11].wyww 86: MUL TEMP[11].yz, TEMP[5].xxxx, TEMP[1].xyww 87: MOV TEMP[2].yz, TEMP[11].zyzz 88: MAD TEMP[9].z, TEMP[9].xxxx, IMM[1].wwww, IMM[0].yyyy 89: MOV TEMP[10].z, TEMP[9].zzzz 90: MOV TEMP[10].xy, TEMP[8].zwzz 91: MOV TEMP[8].xy, IN[1].xyyy 92: TEX TEMP[8], TEMP[8], SAMP[0], 2D 93: MOV TEMP[9].zw, TEMP[8].wwzw 94: MOV TEMP[12].xy, IN[1].xyyy 95: TEX TEMP[12].xw, TEMP[12], SAMP[1], 2D 96: MOV TEMP[13].w, TEMP[12].wwww 97: ADD TEMP[14].xy, TEMP[8], TEMP[8] 98: MOV TEMP[9].xy, TEMP[14].xyxx 99: MAD TEMP[14].xy, TEMP[9], IMM[2].xyzy, IMM[2].yxzy 100: MOV TEMP[9].xy, TEMP[14].xyxx 101: MUL TEMP[14].xy, TEMP[5].yyyy, TEMP[9] 102: MOV TEMP[9].xy, TEMP[14].xyxx 103: MAD TEMP[15].yw, TEMP[1], TEMP[5].xxxx, TEMP[9].xxzy 104: MOV TEMP[1].yw, TEMP[15].wyww 105: MAD TEMP[12].z, TEMP[12].xxxx, IMM[2].wwww, IMM[0].xxxx 106: MOV TEMP[13].z, TEMP[12].zzzz 107: MOV TEMP[13].xy, TEMP[8].zwzz 108: MUL TEMP[8].xyz, TEMP[7].yyyy, TEMP[13] 109: MOV TEMP[13].xyz, TEMP[8].xyzx 110: MAD TEMP[8].xyz, TEMP[10], TEMP[7].xxxx, TEMP[13] 111: MOV TEMP[10].xyz, TEMP[8].xyzx 112: ADD TEMP[8].xy, TEMP[2].yzzw, TEMP[2].yzzw 113: MOV TEMP[5].xy, TEMP[8].xyxx 114: MUL TEMP[8].xy, TEMP[9], TEMP[5] 115: MOV TEMP[13].xy, TEMP[8].xyxx 116: DP2 TEMP[8].x, TEMP[11].yzzz, TEMP[14].yxxx 117: MOV TEMP[13].z, TEMP[8].xxxx 118: ADD TEMP[8].xyz, TEMP[10], TEMP[13] 119: MOV TEMP[10].xyz, TEMP[8].xyzx 120: MOV TEMP[8].xy, IN[2].xyyy 121: TEX TEMP[8], TEMP[8], SAMP[0], 2D 122: MOV TEMP[13].w, TEMP[8].wwww 123: MOV TEMP[12].xy, IN[2].xyyy 124: TEX TEMP[12].xw, TEMP[12], SAMP[1], 2D 125: MOV TEMP[15].w, TEMP[12].wwww 126: MAD TEMP[16].xy, TEMP[8], IMM[2].wwww, IMM[0].xxxx 127: MOV TEMP[7].xy, TEMP[16].xyxx 128: MUL TEMP[16].zw, TEMP[5].zzzz, TEMP[16].xyxy 129: MOV TEMP[9].zw, TEMP[16].wwzw 130: MAD TEMP[17].yw, TEMP[7].xxzy, TEMP[5].zzzz, TEMP[1] 131: MOV TEMP[1].yw, TEMP[17].wyww 132: MAD TEMP[12].z, TEMP[12].xxxx, IMM[1].wwww, IMM[0].yyyy 133: MOV TEMP[15].z, TEMP[12].zzzz 134: MOV TEMP[15].xy, TEMP[8].zwzz 135: MAD TEMP[8].xyz, TEMP[15], TEMP[7].zzzz, TEMP[10] 136: MOV TEMP[7].xyz, TEMP[8].xyzx 137: MUL TEMP[8].xy, TEMP[5], TEMP[16].zwzw 138: MOV TEMP[10].xy, TEMP[8].xyxx 139: DP2 TEMP[8].x, TEMP[11].yzzz, TEMP[16].wzzz 140: MOV TEMP[10].z, TEMP[8].xxxx 141: ADD TEMP[8].xyz, TEMP[7], TEMP[10] 142: MOV TEMP[7].xyz, TEMP[8].xyzx 143: ADD TEMP[8].xy, TEMP[9], TEMP[9] 144: MOV TEMP[10].xy, TEMP[8].xyxx 145: MUL TEMP[8].xy, TEMP[16].zwzw, TEMP[10] 146: MOV TEMP[13].xy, TEMP[8].xyxx 147: DP2 TEMP[8].x, TEMP[14].xyyy, TEMP[16].wzzz 148: MOV TEMP[13].z, TEMP[8].xxxx 149: ADD TEMP[8].xyz, TEMP[7], TEMP[13] 150: MOV TEMP[7].xyz, TEMP[8].xyzx 151: MOV TEMP[8].xy, IN[3].xyyy 152: TEX TEMP[8], TEMP[8], SAMP[0], 2D 153: MOV TEMP[13].zw, TEMP[8].wwzw 154: MOV TEMP[12].xy, IN[3].xyyy 155: TEX TEMP[12].xw, TEMP[12], SAMP[1], 2D 156: MOV TEMP[15].w, TEMP[12].wwww 157: ADD TEMP[17].zw, TEMP[8].xyyx, TEMP[8].xyyx 158: MOV TEMP[10].zw, TEMP[17].wwzw 159: MAD TEMP[17].zw, TEMP[10], IMM[0].ywyx, IMM[0].ywxy 160: MOV TEMP[10].zw, TEMP[17].wwzw 161: MUL TEMP[18].xy, TEMP[5].wwww, TEMP[17].zwzw 162: MOV TEMP[13].xy, TEMP[18].xyxx 163: MAD TEMP[17].xy, TEMP[17].wzzw, TEMP[5].wwww, TEMP[1].ywzw 164: MOV TEMP[19].xy, TEMP[17].xyxx 165: MAD TEMP[12].z, TEMP[12].xxxx, IMM[2].wwww, IMM[0].xxxx 166: MOV TEMP[15].z, TEMP[12].zzzz 167: MOV TEMP[15].xy, TEMP[8].zwzz 168: MAD TEMP[7].xyz, TEMP[15], TEMP[7].wwww, TEMP[7] 169: MUL TEMP[8].xy, TEMP[5], TEMP[13].yxzw 170: MOV TEMP[5].xy, TEMP[8].xyxx 171: DP2 TEMP[8].x, TEMP[11].yzzz, TEMP[18].xyyy 172: MOV TEMP[5].z, TEMP[8].xxxx 173: ADD TEMP[7].yzw, TEMP[5].xxyz, TEMP[7].xxyz 174: MOV TEMP[2].yzw, TEMP[7].zyzw 175: MUL TEMP[7].xy, TEMP[10], TEMP[13].yxzw 176: MOV TEMP[5].xy, TEMP[7].xyxx 177: DP2 TEMP[7].x, TEMP[14].xyyy, TEMP[18].xyyy 178: MOV TEMP[5].z, TEMP[7].xxxx 179: ADD TEMP[7].yzw, TEMP[2], TEMP[5].xxyz 180: MOV TEMP[2].yzw, TEMP[7].zyzw 181: MUL TEMP[7].yw, TEMP[9].xzzw, TEMP[13].xyzx 182: MOV TEMP[1].yw, TEMP[7].wyww 183: ADD TEMP[7].xy, TEMP[1].ywzw, TEMP[1].ywzw 184: MOV TEMP[5].xy, TEMP[7].xyxx 185: DP2 TEMP[7].x, TEMP[16].zwww, TEMP[18].xyyy 186: MOV TEMP[5].z, TEMP[7].xxxx 187: ADD TEMP[7].yzw, TEMP[2], TEMP[5].xxyz 188: MOV TEMP[2].yzw, TEMP[7].zyzw 189: MAD TEMP[7].yzw, TEMP[17].xxyx, -TEMP[17].xxyy, TEMP[2] 190: MOV TEMP[2].yz, TEMP[7].zyzw 191: MOV TEMP[19].z, IMM[0].xxxx 192: DP3 TEMP[8].x, CONST[1].xyzz, TEMP[19].xyzz 193: MOV_SAT TEMP[8].x, TEMP[8].xxxx 194: ADD TEMP[11].xyz, CONST[8], -IN[4] 195: MOV TEMP[5].xyz, TEMP[11].xyzx 196: DP3 TEMP[11].x, TEMP[11].xyzz, TEMP[11].xyzz 197: MAX TEMP[11].x, TEMP[11].xxxx, IMM[3].xxxx 198: RSQ TEMP[11].x, TEMP[11].xxxx 199: MUL TEMP[12].xyz, TEMP[11].xxxx, TEMP[5] 200: MAD TEMP[14].xyz, TEMP[5], TEMP[11].xxxx, -CONST[5] 201: MOV TEMP[9].xyz, TEMP[14].xyzx 202: MAD TEMP[14].xyz, CONST[5].wwww, TEMP[9], CONST[5] 203: MOV TEMP[9].xyz, TEMP[14].xyzx 204: DP3 TEMP[14].x, TEMP[14].xyzz, TEMP[14].xyzz 205: MAX TEMP[14].x, TEMP[14].xxxx, IMM[3].xxxx 206: RSQ TEMP[14].x, TEMP[14].xxxx 207: MAD TEMP[14].xyz, TEMP[9], TEMP[14].xxxx, CONST[1] 208: MOV TEMP[9].xyz, TEMP[14].xyzx 209: RCP TEMP[14].x, TEMP[14].zzzz 210: MAD TEMP[14].xy, TEMP[9], TEMP[14].xxxx, -TEMP[19] 211: RCP TEMP[15].x, CONST[3].wwww 212: ADD TEMP[16].zw, TEMP[2].xyyz, TEMP[15].xxxx 213: MUL TEMP[17].w, TEMP[7].wwww, TEMP[7].wwww 214: MAD TEMP[18].x, TEMP[16].zzzz, TEMP[16].wwww, -TEMP[17].wwww 215: MUL TEMP[20].y, TEMP[14].xxxx, TEMP[14].xxxx 216: ADD TEMP[7].w, TEMP[7].wwww, TEMP[7].wwww 217: MOV TEMP[2].w, TEMP[7].wwww 218: MUL TEMP[21].x, TEMP[14].xxxx, TEMP[7].wwww 219: MAD TEMP[22].z, TEMP[16].zzzz, TEMP[14].yyyy, -TEMP[21].xxxx 220: MUL TEMP[22].z, TEMP[14].yyyy, TEMP[22].zzzz 221: MAD TEMP[16].z, TEMP[20].yyyy, TEMP[16].wwww, TEMP[22].zzzz 222: MUL TEMP[16].z, TEMP[16].zzzz, IMM[3].yyyy 223: RCP TEMP[22].x, TEMP[18].xxxx 224: MUL TEMP[23].z, TEMP[22].xxxx, TEMP[16].zzzz 225: MOV TEMP[24].x, -TEMP[18].xxxx 226: FSGE TEMP[24].x, TEMP[24].xxxx, IMM[0].zzzz 227: UIF TEMP[24].xxxx :0 228: MOV TEMP[24].x, IMM[0].xxxx 229: ELSE :0 230: MOV TEMP[24].x, IMM[0].zzzz 231: ENDIF 232: MOV TEMP[10].w, TEMP[24].xxxx 233: MAD TEMP[16].z, TEMP[16].zzzz, TEMP[22].xxxx, IMM[3].zzzz 234: FSGE TEMP[16].x, TEMP[16].zzzz, IMM[0].zzzz 235: UIF TEMP[16].xxxx :0 236: MOV TEMP[16].x, IMM[0].xxxx 237: ELSE :0 238: MOV TEMP[16].x, IMM[0].zzzz 239: ENDIF 240: MOV TEMP[9].z, TEMP[16].xxxx 241: MUL TEMP[16].w, TEMP[23].zzzz, IMM[3].wwww 242: EX2 TEMP[16].x, TEMP[16].wwww 243: MAX TEMP[18].x, TEMP[18].xxxx, IMM[3].xxxx 244: RSQ TEMP[18].x, TEMP[18].xxxx 245: MUL TEMP[16].w, TEMP[16].xxxx, TEMP[18].xxxx 246: MOV TEMP[9].w, TEMP[16].wwww 247: MAD TEMP[15].xz, TEMP[15].xxxx, IMM[4].xxxx, TEMP[2].yyzw 248: MAD TEMP[18].w, TEMP[15].xxxx, TEMP[15].zzzz, -TEMP[17].wwww 249: MAD TEMP[21].x, TEMP[15].xxxx, TEMP[14].yyyy, -TEMP[21].xxxx 250: MUL TEMP[14].x, TEMP[14].yyyy, TEMP[21].xxxx 251: MAD TEMP[14].x, TEMP[20].yyyy, TEMP[15].zzzz, TEMP[14].xxxx 252: MUL TEMP[14].x, TEMP[14].xxxx, IMM[3].yyyy 253: RCP TEMP[15].x, TEMP[18].wwww 254: MOV TEMP[9].y, TEMP[15].xxxx 255: MUL TEMP[20].x, TEMP[15].xxxx, TEMP[14].xxxx 256: MOV TEMP[21].x, -TEMP[18].wwww 257: FSGE TEMP[21].x, TEMP[21].xxxx, IMM[0].zzzz 258: UIF TEMP[21].xxxx :0 259: MOV TEMP[21].x, IMM[0].xxxx 260: ELSE :0 261: MOV TEMP[21].x, IMM[0].zzzz 262: ENDIF 263: MOV TEMP[10].y, TEMP[21].xxxx 264: MAD TEMP[14].x, TEMP[14].xxxx, TEMP[15].xxxx, IMM[3].zzzz 265: FSGE TEMP[14].x, TEMP[14].xxxx, IMM[0].zzzz 266: UIF TEMP[14].xxxx :0 267: MOV TEMP[14].x, IMM[0].xxxx 268: ELSE :0 269: MOV TEMP[14].x, IMM[0].zzzz 270: ENDIF 271: MOV TEMP[9].x, TEMP[14].xxxx 272: ADD TEMP[9].xz, TEMP[9], TEMP[10].yyww 273: MUL TEMP[14].y, TEMP[20].xxxx, IMM[3].wwww 274: EX2 TEMP[14].x, TEMP[14].yyyy 275: MAX TEMP[15].x, TEMP[18].wwww, IMM[3].xxxx 276: RSQ TEMP[15].x, TEMP[15].xxxx 277: MUL TEMP[14].w, TEMP[15].xxxx, TEMP[14].xxxx 278: MOV TEMP[5].w, TEMP[14].wwww 279: MAD TEMP[11].xyz, TEMP[5], TEMP[11].xxxx, -CONST[6] 280: MOV TEMP[5].xyz, TEMP[11].xyzx 281: MAD TEMP[11].xyz, CONST[6].wwww, TEMP[5], CONST[6] 282: MOV TEMP[5].xyz, TEMP[11].xyzx 283: DP3 TEMP[11].x, TEMP[11].xyzz, TEMP[11].xyzz 284: MAX TEMP[11].x, TEMP[11].xxxx, IMM[3].xxxx 285: RSQ TEMP[11].x, TEMP[11].xxxx 286: MAD TEMP[11].xyz, TEMP[5], TEMP[11].xxxx, CONST[2] 287: MOV TEMP[5].xyz, TEMP[11].xyzx 288: RCP TEMP[11].x, TEMP[11].zzzz 289: MAD TEMP[11].xy, TEMP[5], TEMP[11].xxxx, -TEMP[19] 290: RCP TEMP[14].x, CONST[4].wwww 291: ADD TEMP[14].xy, TEMP[14].xxxx, TEMP[2].yzzw 292: MAD TEMP[15].w, TEMP[14].xxxx, TEMP[14].yyyy, -TEMP[17].wwww 293: MUL TEMP[17].z, TEMP[11].xxxx, TEMP[11].xxxx 294: MUL TEMP[7].w, TEMP[7].wwww, TEMP[11].xxxx 295: MAD TEMP[7].w, TEMP[14].xxxx, TEMP[11].yyyy, -TEMP[7].wwww 296: MUL TEMP[7].w, TEMP[11].yyyy, TEMP[7].wwww 297: MAD TEMP[7].w, TEMP[17].zzzz, TEMP[14].yyyy, TEMP[7].wwww 298: MUL TEMP[7].w, TEMP[7].wwww, IMM[3].yyyy 299: RCP TEMP[5].x, TEMP[15].wwww 300: MUL TEMP[11].y, TEMP[7].wwww, TEMP[5].xxxx 301: MOV TEMP[5].y, TEMP[11].yyyy 302: MOV TEMP[14].x, -TEMP[15].wwww 303: FSGE TEMP[14].x, TEMP[14].xxxx, IMM[0].zzzz 304: UIF TEMP[14].xxxx :0 305: MOV TEMP[14].x, IMM[0].xxxx 306: ELSE :0 307: MOV TEMP[14].x, IMM[0].zzzz 308: ENDIF 309: MOV TEMP[5].z, TEMP[14].xxxx 310: MAD TEMP[7].w, TEMP[7].wwww, TEMP[5].xxxx, IMM[3].zzzz 311: FSGE TEMP[7].x, TEMP[7].wwww, IMM[0].zzzz 312: UIF TEMP[7].xxxx :0 313: MOV TEMP[7].x, IMM[0].xxxx 314: ELSE :0 315: MOV TEMP[7].x, IMM[0].zzzz 316: ENDIF 317: ADD TEMP[7].w, TEMP[7].xxxx, TEMP[14].xxxx 318: MOV TEMP[2].w, TEMP[7].wwww 319: MUL TEMP[14].xw, TEMP[5].yyzw, IMM[4].yyxz 320: MAX TEMP[17].x, TEMP[15].wwww, IMM[3].xxxx 321: RSQ TEMP[17].x, TEMP[17].xxxx 322: EX2 TEMP[18].x, TEMP[14].xxxx 323: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[18].xxxx 324: MUL TEMP[18].xyz, CONST[10].xyww, IN[4].yyyy 325: MOV TEMP[10].xyz, TEMP[18].xyzx 326: MAD TEMP[18].xyz, IN[4].xxxx, CONST[9].xyww, TEMP[10] 327: MOV TEMP[10].xyz, TEMP[18].xyzx 328: MAD TEMP[18].xyz, IN[4].zzzz, CONST[11].xyww, TEMP[10] 329: MOV TEMP[10].xyz, TEMP[18].xyzx 330: ADD TEMP[18].xyz, TEMP[10], CONST[12].xyww 331: MOV TEMP[10].xyz, TEMP[18].xyzx 332: RCP TEMP[18].x, TEMP[18].zzzz 333: MUL TEMP[18].xy, TEMP[18].xxxx, TEMP[10] 334: MOV TEMP[10].xy, TEMP[18].xyxx 335: MAD TEMP[18].xy, TEMP[10], IMM[5], IMM[4].wwww 336: MOV TEMP[10].xy, TEMP[18].xyxx 337: MAD TEMP[18].yz, TEMP[2], IMM[5].zzzz, TEMP[10].xxyw 338: MOV TEMP[2].yz, TEMP[18].zyzz 339: MAD TEMP[18].xy, TEMP[15].wwww, IMM[1].wwww, TEMP[2].yzzw 340: MAD TEMP[20].xy, TEMP[15].wwww, IMM[6], TEMP[2].yzzw 341: MOV TEMP[20].xy, TEMP[20].xyyy 342: TEX TEMP[20], TEMP[20], SAMP[2], 2D 343: MUL TEMP[13], TEMP[20], IMM[5].wwww 344: MOV TEMP[18].xy, TEMP[18].xyyy 345: TEX TEMP[18], TEMP[18], SAMP[2], 2D 346: MAD TEMP[10], TEMP[18], IMM[5].wwww, TEMP[13] 347: MAD TEMP[18].xy, TEMP[15].wwww, IMM[6].yzzw, TEMP[2].yzzw 348: MOV TEMP[18].xy, TEMP[18].xyyy 349: TEX TEMP[18], TEMP[18], SAMP[2], 2D 350: MAD TEMP[10], TEMP[18], IMM[5].wwww, TEMP[10] 351: MAD TEMP[15].yz, TEMP[15].wwww, IMM[2].wwww, TEMP[2] 352: MOV TEMP[15].xy, TEMP[15].yzzz 353: TEX TEMP[15], TEMP[15], SAMP[2], 2D 354: MOV TEMP[13].w, TEMP[15].wwww 355: MAD TEMP[10], TEMP[15], IMM[5].wwww, TEMP[10] 356: ABS TEMP[15].x, TEMP[10].xxxx 357: LG2 TEMP[13].x, TEMP[15].xxxx 358: ABS TEMP[15].x, TEMP[10].yyyy 359: LG2 TEMP[15].x, TEMP[15].xxxx 360: MOV TEMP[13].y, TEMP[15].xxxx 361: ABS TEMP[15].x, TEMP[10].zzzz 362: LG2 TEMP[15].x, TEMP[15].xxxx 363: MOV TEMP[13].z, TEMP[15].xxxx 364: MUL TEMP[15].xyz, TEMP[13], IMM[1].yyyy 365: EX2 TEMP[13].x, TEMP[15].xxxx 366: EX2 TEMP[18].x, TEMP[15].yyyy 367: MOV TEMP[13].y, TEMP[18].xxxx 368: EX2 TEMP[15].x, TEMP[15].zzzz 369: MOV TEMP[13].z, TEMP[15].xxxx 370: DP3 TEMP[12].x, TEMP[19].xyzz, TEMP[12].xyzz 371: ADD TEMP[12].w, -TEMP[12].xxxx, IMM[0].xxxx 372: MUL TEMP[15].y, TEMP[12].wwww, TEMP[12].wwww 373: MUL TEMP[15].y, TEMP[15].yyyy, TEMP[15].yyyy 374: MUL TEMP[12].w, TEMP[12].wwww, TEMP[15].yyyy 375: MUL TEMP[15].xyz, TEMP[16].wwww, CONST[3] 376: MOV TEMP[16], -TEMP[9].zzzz 377: FSGE TEMP[18].x, TEMP[16].xxxx, IMM[0].zzzz 378: UIF TEMP[18].xxxx :0 379: MOV TEMP[18].x, TEMP[15].xxxx 380: ELSE :0 381: MOV TEMP[18].x, IMM[0].zzzz 382: ENDIF 383: MOV TEMP[18].x, TEMP[18].xxxx 384: FSGE TEMP[19].x, TEMP[16].yyyy, IMM[0].zzzz 385: UIF TEMP[19].xxxx :0 386: MOV TEMP[19].x, TEMP[15].yyyy 387: ELSE :0 388: MOV TEMP[19].x, IMM[0].zzzz 389: ENDIF 390: MOV TEMP[18].y, TEMP[19].xxxx 391: FSGE TEMP[19].x, TEMP[16].zzzz, IMM[0].zzzz 392: UIF TEMP[19].xxxx :0 393: MOV TEMP[15].x, TEMP[15].zzzz 394: ELSE :0 395: MOV TEMP[15].x, IMM[0].zzzz 396: ENDIF 397: MOV TEMP[18].z, TEMP[15].xxxx 398: FSGE TEMP[15].x, TEMP[16].wwww, IMM[0].zzzz 399: UIF TEMP[15].xxxx :0 400: ELSE :0 401: ENDIF 402: MUL TEMP[15].yzw, TEMP[17].xxxx, CONST[4].xxyz 403: MOV TEMP[7], -TEMP[7].wwww 404: FSGE TEMP[16].x, TEMP[7].xxxx, IMM[0].zzzz 405: UIF TEMP[16].xxxx :0 406: ELSE :0 407: ENDIF 408: FSGE TEMP[17].x, TEMP[7].yyyy, IMM[0].zzzz 409: UIF TEMP[17].xxxx :0 410: MOV TEMP[17].x, TEMP[15].yyyy 411: ELSE :0 412: MOV TEMP[17].x, IMM[0].zzzz 413: ENDIF 414: MOV TEMP[16].y, TEMP[17].xxxx 415: FSGE TEMP[17].x, TEMP[7].zzzz, IMM[0].zzzz 416: UIF TEMP[17].xxxx :0 417: MOV TEMP[17].x, TEMP[15].zzzz 418: ELSE :0 419: MOV TEMP[17].x, IMM[0].zzzz 420: ENDIF 421: MOV TEMP[16].z, TEMP[17].xxxx 422: FSGE TEMP[7].x, TEMP[7].wwww, IMM[0].zzzz 423: UIF TEMP[7].xxxx :0 424: MOV TEMP[7].x, TEMP[15].wwww 425: ELSE :0 426: MOV TEMP[7].x, IMM[0].zzzz 427: ENDIF 428: MOV TEMP[16].w, TEMP[7].xxxx 429: MOV TEMP[2].yzw, TEMP[16].zyzw 430: ADD TEMP[7].yzw, TEMP[2], TEMP[18].xxyz 431: MOV TEMP[2].yzw, TEMP[7].zyzw 432: MAD TEMP[7].yzw, TEMP[2], TEMP[12].wwww, TEMP[6].xxyz 433: MOV TEMP[2].yzw, TEMP[7].zyzw 434: MUL TEMP[7].xyz, TEMP[6], CONST[3] 435: MUL TEMP[7].xzw, TEMP[14].wwww, TEMP[7].xyyz 436: MOV TEMP[9], -TEMP[9].xxxx 437: FSGE TEMP[12].x, TEMP[9].xxxx, IMM[0].zzzz 438: UIF TEMP[12].xxxx :0 439: MOV TEMP[12].x, TEMP[7].xxxx 440: ELSE :0 441: MOV TEMP[12].x, IMM[0].zzzz 442: ENDIF 443: MOV TEMP[12].x, TEMP[12].xxxx 444: FSGE TEMP[14].x, TEMP[9].yyyy, IMM[0].zzzz 445: UIF TEMP[14].xxxx :0 446: ELSE :0 447: ENDIF 448: FSGE TEMP[14].x, TEMP[9].zzzz, IMM[0].zzzz 449: UIF TEMP[14].xxxx :0 450: MOV TEMP[14].x, TEMP[7].zzzz 451: ELSE :0 452: MOV TEMP[14].x, IMM[0].zzzz 453: ENDIF 454: MOV TEMP[12].z, TEMP[14].xxxx 455: FSGE TEMP[9].x, TEMP[9].wwww, IMM[0].zzzz 456: UIF TEMP[9].xxxx :0 457: MOV TEMP[7].x, TEMP[7].wwww 458: ELSE :0 459: MOV TEMP[7].x, IMM[0].zzzz 460: ENDIF 461: MOV TEMP[12].w, TEMP[7].xxxx 462: MAD TEMP[7].yzw, TEMP[2], TEMP[8].xxxx, TEMP[12].xxzw 463: MOV TEMP[2].yzw, TEMP[7].zyzw 464: MAD TEMP[7].yzw, TEMP[13].xxyz, CONST[23].xxxx, TEMP[2] 465: MOV TEMP[2].w, TEMP[7].zyzw 466: MUL TEMP[8].y, TEMP[11].yyyy, TEMP[11].yyyy 467: MUL TEMP[8].y, TEMP[11].yyyy, TEMP[8].yyyy 468: MUL TEMP[3].y, TEMP[3].xxxx, TEMP[8].yyyy 469: MAD TEMP[3].xyz, TEMP[3].yyyy, TEMP[6], TEMP[7].yzww 470: MOV TEMP[2].xyz, TEMP[3].xyzx 471: MUL TEMP[3].xyz, CONST[14].xyww, IN[4].yyyy 472: MOV TEMP[4].xyz, TEMP[3].xyzx 473: MAD TEMP[3].xyz, IN[4].xxxx, CONST[13].xyww, TEMP[4] 474: MOV TEMP[4].xyz, TEMP[3].xyzx 475: MAD TEMP[3].xyz, IN[4].zzzz, CONST[15].xyww, TEMP[4] 476: MOV TEMP[4].xyz, TEMP[3].xyzx 477: ADD TEMP[3].xyz, TEMP[4], CONST[16].xyww 478: RCP TEMP[7].x, TEMP[3].zzzz 479: MUL TEMP[3].yw, TEMP[7].xxxx, TEMP[3].xxzy 480: MOV TEMP[1].yw, TEMP[3].wyww 481: MAD TEMP[3].yw, TEMP[1], IMM[5].xxzy, IMM[4].wwww 482: MOV TEMP[1].yw, TEMP[3].wyww 483: MAD TEMP[3].xy, TEMP[1].ywzw, IMM[0].xyxx, IMM[0].zxzz 484: MOV TEMP[3].xy, TEMP[3].xyyy 485: TEX TEMP[3].xw, TEMP[3], SAMP[10], 2D 486: MOV TEMP[5].w, TEMP[3].wwww 487: MOV_SAT TEMP[3].x, TEMP[3].xxxx 488: MUL TEMP[3].y, TEMP[3].xxxx, CONST[21].xxxx 489: MUL TEMP[3].xyz, TEMP[3].yyyy, TEMP[2] 490: MOV TEMP[4].xyz, TEMP[3].xyzx 491: MAD TEMP[3].xyz, TEMP[4], IMM[7].xxxx, TEMP[2] 492: MAD TEMP[6], TEMP[10].wwww, TEMP[6].wwww, TEMP[6].wwww 493: MOV_SAT TEMP[6], TEMP[6] 494: MOV TEMP[6].w, TEMP[6].wwww 495: ABS TEMP[7].x, TEMP[3].xxxx 496: LG2 TEMP[4].x, TEMP[7].xxxx 497: ABS TEMP[7].x, TEMP[3].yyyy 498: LG2 TEMP[7].x, TEMP[7].xxxx 499: MOV TEMP[4].y, TEMP[7].xxxx 500: ABS TEMP[3].x, TEMP[3].zzzz 501: LG2 TEMP[3].x, TEMP[3].xxxx 502: MOV TEMP[4].z, TEMP[3].xxxx 503: MUL TEMP[3].xyz, TEMP[4], IMM[7].yyyy 504: EX2 TEMP[4].x, TEMP[3].xxxx 505: EX2 TEMP[7].x, TEMP[3].yyyy 506: MOV TEMP[4].y, TEMP[7].xxxx 507: EX2 TEMP[3].x, TEMP[3].zzzz 508: MOV TEMP[4].z, TEMP[3].xxxx 509: MAD TEMP[3].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 510: MOV TEMP[3].xy, TEMP[3].xyyy 511: TEX TEMP[3], TEMP[3], SAMP[7], 2D 512: MOV TEMP[2].w, TEMP[3].wwww 513: ADD TEMP[7].y, -TEMP[3].wwww, IMM[0].xxxx 514: MAD TEMP[3].xyz, TEMP[4], TEMP[7].yyyy, TEMP[3] 515: MOV TEMP[2].xyz, TEMP[3].xyzx 516: ADD TEMP[3].xyz, TEMP[2], IMM[7].zzzz 517: MOV TEMP[2].xyz, TEMP[3].xyzx 518: MAD TEMP[3].xyz, TEMP[0].xxxx, TEMP[2], IMM[7].wwww 519: MOV TEMP[2].xyz, TEMP[3].xyzx 520: MOV TEMP[3].xyz, TEMP[3].xyzz 521: TEX TEMP[3], TEMP[3], SAMP[9], 3D 522: MAD TEMP[0].x, TEMP[0].zzzz, -TEMP[0].xxxx, TEMP[0].xxxx 523: LRP TEMP[0].xyz, TEMP[0].xxxx, TEMP[3], TEMP[2] 524: MOV TEMP[5].xyz, TEMP[0].xyzx 525: RCP TEMP[1].x, IN[5].zzzz 526: MUL TEMP[2].x, TEMP[1].xxxx, IN[5].yyyy 527: MAD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy, IMM[3].yyyy 528: MOV TEMP[1].x, TEMP[2].xxxx 529: MOV TEMP[1].y, CONST[7].wwww 530: MOV TEMP[2].xy, TEMP[1].xyyy 531: TEX TEMP[2].x, TEMP[2], SAMP[6], 2D 532: MOV TEMP[1].x, TEMP[2].xxxx 533: ADD TEMP[3].yzw, -CONST[8].xxyz, IN[4].xxyz 534: MOV TEMP[1].w, TEMP[3].zyzw 535: DP3 TEMP[4].x, TEMP[3].yzww, TEMP[3].yzww 536: MOV TEMP[1].y, TEMP[4].xxxx 537: MUL TEMP[1].yz, TEMP[1].xyww, CONST[7].xyxw 538: MUL TEMP[4].z, TEMP[1].zzzz, IMM[8].xxxx 539: EX2 TEMP[4].x, TEMP[4].zzzz 540: ADD TEMP[4].z, -TEMP[4].xxxx, IMM[0].xxxx 541: MUL TEMP[1].y, TEMP[4].zzzz, TEMP[1].yyyy 542: RCP TEMP[3].x, TEMP[3].wwww 543: MUL TEMP[1].y, TEMP[3].xxxx, TEMP[1].yyyy 544: MUL TEMP[1].y, TEMP[1].yyyy, IMM[8].xxxx 545: EX2 TEMP[1].x, TEMP[1].yyyy 546: MOV_SAT TEMP[1].x, TEMP[1].xxxx 547: ADD TEMP[1].y, -TEMP[1].xxxx, IMM[0].xxxx 548: MUL TEMP[1].x, TEMP[1].yyyy, TEMP[2].xxxx 549: ADD TEMP[0].yzw, -TEMP[0].xxyz, CONST[22].xxyz 550: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[5] 551: MOV TEMP[6].xyz, TEMP[0].xyzx 552: MOV OUT[0], TEMP[6] 553: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 220) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 252) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %85 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %86 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %87 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356) %88 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360) %89 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %90 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %91 = load <8 x i32> addrspace(2)* %90, !tbaa !0 %92 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %93 = load <4 x i32> addrspace(2)* %92, !tbaa !0 %94 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %95 = load <8 x i32> addrspace(2)* %94, !tbaa !0 %96 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %97 = load <4 x i32> addrspace(2)* %96, !tbaa !0 %98 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %99 = load <8 x i32> addrspace(2)* %98, !tbaa !0 %100 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %101 = load <4 x i32> addrspace(2)* %100, !tbaa !0 %102 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %103 = load <8 x i32> addrspace(2)* %102, !tbaa !0 %104 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %105 = load <4 x i32> addrspace(2)* %104, !tbaa !0 %106 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %107 = load <8 x i32> addrspace(2)* %106, !tbaa !0 %108 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %109 = load <4 x i32> addrspace(2)* %108, !tbaa !0 %110 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %111 = load <8 x i32> addrspace(2)* %110, !tbaa !0 %112 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %113 = load <4 x i32> addrspace(2)* %112, !tbaa !0 %114 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %115 = load <8 x i32> addrspace(2)* %114, !tbaa !0 %116 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %117 = load <4 x i32> addrspace(2)* %116, !tbaa !0 %118 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %119 = load <8 x i32> addrspace(2)* %118, !tbaa !0 %120 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %121 = load <4 x i32> addrspace(2)* %120, !tbaa !0 %122 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %123 = load <8 x i32> addrspace(2)* %122, !tbaa !0 %124 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %125 = load <4 x i32> addrspace(2)* %124, !tbaa !0 %126 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %127 = load <8 x i32> addrspace(2)* %126, !tbaa !0 %128 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %129 = load <4 x i32> addrspace(2)* %128, !tbaa !0 %130 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 10 %131 = load <8 x i32> addrspace(2)* %130, !tbaa !0 %132 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 10 %133 = load <4 x i32> addrspace(2)* %132, !tbaa !0 %134 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %135 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %136 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %137 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %138 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %139 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %140 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %141 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %142 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %143 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %144 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %145 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %146 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %147 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %148 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %149 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %150 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %151 = fmul float %147, 1.000000e+00 %152 = fadd float %151, 0.000000e+00 %153 = fmul float %148, -1.000000e+00 %154 = fadd float %153, 1.000000e+00 %155 = bitcast float %152 to i32 %156 = bitcast float %154 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %123 to <32 x i8> %160 = bitcast <4 x i32> %125 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = extractelement <4 x float> %161, i32 2 %164 = call float @fabs(float %162) %165 = fsub float -0.000000e+00, %164 %166 = fsub float -0.000000e+00, %164 %167 = fsub float -0.000000e+00, %164 %168 = fsub float -0.000000e+00, %164 %169 = fcmp oge float %165, 0.000000e+00 %170 = sext i1 %169 to i32 %171 = bitcast i32 %170 to float %172 = bitcast float %171 to i32 %173 = icmp ne i32 %172, 0 %. = select i1 %173, float -1.000000e+00, float -0.000000e+00 %174 = fcmp oge float %166, 0.000000e+00 %175 = sext i1 %174 to i32 %176 = bitcast i32 %175 to float %177 = bitcast float %176 to i32 %178 = icmp ne i32 %177, 0 %temp16.0 = select i1 %178, float -1.000000e+00, float -0.000000e+00 %179 = fcmp oge float %167, 0.000000e+00 %180 = sext i1 %179 to i32 %181 = bitcast i32 %180 to float %182 = bitcast float %181 to i32 %183 = icmp ne i32 %182, 0 %.166 = select i1 %183, float -1.000000e+00, float -0.000000e+00 %184 = fcmp oge float %168, 0.000000e+00 %185 = sext i1 %184 to i32 %186 = bitcast i32 %185 to float %187 = bitcast float %186 to i32 %188 = icmp ne i32 %187, 0 %189 = fcmp olt float %., 0.000000e+00 %190 = sext i1 %189 to i32 %191 = fcmp olt float %temp16.0, 0.000000e+00 %192 = sext i1 %191 to i32 %193 = fcmp olt float %.166, 0.000000e+00 %194 = sext i1 %193 to i32 %195 = bitcast i32 %190 to float %196 = bitcast i32 %192 to float %197 = bitcast i32 %194 to float %198 = bitcast float %195 to i32 %199 = bitcast float %197 to i32 %200 = or i32 %198, %199 %201 = bitcast i32 %200 to float %202 = bitcast float %201 to i32 %203 = bitcast float %196 to i32 %204 = or i32 %202, %203 %205 = bitcast i32 %204 to float %206 = bitcast float %205 to i32 %207 = icmp ne i32 %206, 0 br i1 %207, label %IF110, label %ENDIF109 IF110: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF109 ENDIF109: ; preds = %main_body, %IF110 %208 = fmul float %79, %143 %209 = fmul float %80, %143 %210 = fmul float %142, %77 %211 = fadd float %210, %208 %212 = fmul float %142, %78 %213 = fadd float %212, %209 %214 = fmul float %144, %81 %215 = fadd float %214, %211 %216 = fmul float %144, %82 %217 = fadd float %216, %213 %218 = fadd float %215, %83 %219 = fadd float %217, %84 %220 = bitcast float %149 to i32 %221 = bitcast float %150 to i32 %222 = insertelement <2 x i32> undef, i32 %220, i32 0 %223 = insertelement <2 x i32> %222, i32 %221, i32 1 %224 = bitcast <8 x i32> %111 to <32 x i8> %225 = bitcast <4 x i32> %113 to <16 x i8> %226 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %223, <32 x i8> %224, <16 x i8> %225, i32 2) %227 = extractelement <4 x float> %226, i32 3 %228 = fmul float %227, 4.000000e+00 %229 = fmul float %227, 4.000000e+00 %230 = fmul float %227, 4.000000e+00 %231 = fmul float %227, 4.000000e+00 %232 = call float @llvm.AMDIL.clamp.(float %228, float 0.000000e+00, float 1.000000e+00) %233 = call float @llvm.AMDIL.clamp.(float %229, float 0.000000e+00, float 1.000000e+00) %234 = call float @llvm.AMDIL.clamp.(float %230, float 0.000000e+00, float 1.000000e+00) %235 = call float @llvm.AMDIL.clamp.(float %231, float 0.000000e+00, float 1.000000e+00) %236 = fsub float -0.000000e+00, %232 %237 = fadd float %236, 1.000000e+00 %238 = bitcast float %237 to i32 %239 = bitcast float 0.000000e+00 to i32 %240 = bitcast float 0.000000e+00 to i32 %241 = insertelement <4 x i32> undef, i32 %238, i32 0 %242 = insertelement <4 x i32> %241, i32 %239, i32 1 %243 = insertelement <4 x i32> %242, i32 %240, i32 2 %244 = insertelement <4 x i32> %243, i32 undef, i32 3 %245 = bitcast <8 x i32> %107 to <32 x i8> %246 = bitcast <4 x i32> %109 to <16 x i8> %247 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %244, <32 x i8> %245, <16 x i8> %246, i32 2) %248 = extractelement <4 x float> %247, i32 0 %249 = extractelement <4 x float> %247, i32 1 %250 = extractelement <4 x float> %247, i32 2 %251 = extractelement <4 x float> %247, i32 3 %252 = call float @llvm.pow.f32(float %248, float 0x40019999A0000000) %253 = call float @llvm.pow.f32(float %249, float 0x40019999A0000000) %254 = call float @llvm.pow.f32(float %250, float 0x40019999A0000000) %255 = call float @llvm.pow.f32(float %251, float 1.000000e+00) %256 = fmul float %218, 1.000000e+00 %257 = fadd float %256, 0.000000e+00 %258 = fmul float %219, -1.000000e+00 %259 = fadd float %258, 1.000000e+00 %260 = bitcast float %257 to i32 %261 = bitcast float %259 to i32 %262 = insertelement <2 x i32> undef, i32 %260, i32 0 %263 = insertelement <2 x i32> %262, i32 %261, i32 1 %264 = bitcast <8 x i32> %103 to <32 x i8> %265 = bitcast <4 x i32> %105 to <16 x i8> %266 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %263, <32 x i8> %264, <16 x i8> %265, i32 2) %267 = extractelement <4 x float> %266, i32 0 %268 = extractelement <4 x float> %266, i32 1 %269 = fmul float %267, 2.000000e+00 %270 = fadd float %269, -1.000000e+00 %271 = fmul float %268, 2.000000e+00 %272 = fadd float %271, -1.000000e+00 %273 = fmul float 1.000000e+00, %270 %274 = fmul float -1.000000e+00, %272 %275 = fadd float %273, %274 %276 = fmul float -1.000000e+00, %270 %277 = fmul float -1.000000e+00, %272 %278 = fadd float %276, %277 %279 = fsub float -0.000000e+00, %275 %280 = fsub float -0.000000e+00, %278 %281 = fadd float %275, 2.000000e+00 %282 = fadd float %278, 2.000000e+00 %283 = fadd float %279, 2.000000e+00 %284 = fadd float %280, 2.000000e+00 %285 = fmul float %281, %281 %286 = fmul float %282, %282 %287 = fmul float %283, %283 %288 = fmul float %284, %284 %289 = fmul float %285, %285 %290 = fmul float %286, %286 %291 = fmul float %287, %287 %292 = fmul float %288, %288 %293 = fmul float 1.000000e+00, %289 %294 = fmul float 1.000000e+00, %290 %295 = fadd float %293, %294 %296 = fmul float 1.000000e+00, %291 %297 = fadd float %295, %296 %298 = fmul float 1.000000e+00, %292 %299 = fadd float %297, %298 %300 = fdiv float 1.000000e+00, %299 %301 = fmul float %300, %289 %302 = fmul float %300, %290 %303 = fmul float %300, %291 %304 = fmul float %300, %292 %305 = fdiv float 1.000000e+00, %24 %306 = fmul float %305, %301 %307 = fmul float %305, %302 %308 = fmul float %305, %303 %309 = fmul float %305, %304 %310 = fmul float %306, %306 %311 = fmul float %307, %307 %312 = fmul float %308, %308 %313 = fmul float %309, %309 %314 = bitcast float %134 to i32 %315 = bitcast float %135 to i32 %316 = insertelement <2 x i32> undef, i32 %314, i32 0 %317 = insertelement <2 x i32> %316, i32 %315, i32 1 %318 = bitcast <8 x i32> %91 to <32 x i8> %319 = bitcast <4 x i32> %93 to <16 x i8> %320 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %317, <32 x i8> %318, <16 x i8> %319, i32 2) %321 = extractelement <4 x float> %320, i32 0 %322 = extractelement <4 x float> %320, i32 1 %323 = extractelement <4 x float> %320, i32 2 %324 = extractelement <4 x float> %320, i32 3 %325 = bitcast float %134 to i32 %326 = bitcast float %135 to i32 %327 = insertelement <2 x i32> undef, i32 %325, i32 0 %328 = insertelement <2 x i32> %327, i32 %326, i32 1 %329 = bitcast <8 x i32> %95 to <32 x i8> %330 = bitcast <4 x i32> %97 to <16 x i8> %331 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %328, <32 x i8> %329, <16 x i8> %330, i32 2) %332 = extractelement <4 x float> %331, i32 0 %333 = fmul float %321, 2.000000e+00 %334 = fadd float %333, -1.000000e+00 %335 = fmul float %322, 2.000000e+00 %336 = fadd float %335, -1.000000e+00 %337 = fmul float %306, %334 %338 = fmul float %306, %336 %339 = fmul float %332, 2.000000e+00 %340 = fadd float %339, -1.000000e+00 %341 = bitcast float %136 to i32 %342 = bitcast float %137 to i32 %343 = insertelement <2 x i32> undef, i32 %341, i32 0 %344 = insertelement <2 x i32> %343, i32 %342, i32 1 %345 = bitcast <8 x i32> %91 to <32 x i8> %346 = bitcast <4 x i32> %93 to <16 x i8> %347 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %344, <32 x i8> %345, <16 x i8> %346, i32 2) %348 = extractelement <4 x float> %347, i32 0 %349 = extractelement <4 x float> %347, i32 1 %350 = extractelement <4 x float> %347, i32 2 %351 = extractelement <4 x float> %347, i32 3 %352 = bitcast float %136 to i32 %353 = bitcast float %137 to i32 %354 = insertelement <2 x i32> undef, i32 %352, i32 0 %355 = insertelement <2 x i32> %354, i32 %353, i32 1 %356 = bitcast <8 x i32> %95 to <32 x i8> %357 = bitcast <4 x i32> %97 to <16 x i8> %358 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %355, <32 x i8> %356, <16 x i8> %357, i32 2) %359 = extractelement <4 x float> %358, i32 0 %360 = fadd float %348, %348 %361 = fadd float %349, %349 %362 = fmul float %360, -1.000000e+00 %363 = fadd float %362, 1.000000e+00 %364 = fmul float %361, 1.000000e+00 %365 = fadd float %364, -1.000000e+00 %366 = fmul float %307, %363 %367 = fmul float %307, %365 %368 = fmul float %334, %306 %369 = fadd float %368, %366 %370 = fmul float %336, %306 %371 = fadd float %370, %367 %372 = fmul float %359, -2.000000e+00 %373 = fadd float %372, 1.000000e+00 %374 = fmul float %311, %350 %375 = fmul float %311, %351 %376 = fmul float %311, %373 %377 = fmul float %323, %310 %378 = fadd float %377, %374 %379 = fmul float %324, %310 %380 = fadd float %379, %375 %381 = fmul float %340, %310 %382 = fadd float %381, %376 %383 = fadd float %337, %337 %384 = fadd float %338, %338 %385 = fmul float %366, %383 %386 = fmul float %367, %384 %387 = fmul float %337, %367 %388 = fmul float %338, %366 %389 = fadd float %387, %388 %390 = fadd float %378, %385 %391 = fadd float %380, %386 %392 = fadd float %382, %389 %393 = bitcast float %138 to i32 %394 = bitcast float %139 to i32 %395 = insertelement <2 x i32> undef, i32 %393, i32 0 %396 = insertelement <2 x i32> %395, i32 %394, i32 1 %397 = bitcast <8 x i32> %91 to <32 x i8> %398 = bitcast <4 x i32> %93 to <16 x i8> %399 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %396, <32 x i8> %397, <16 x i8> %398, i32 2) %400 = extractelement <4 x float> %399, i32 0 %401 = extractelement <4 x float> %399, i32 1 %402 = extractelement <4 x float> %399, i32 2 %403 = extractelement <4 x float> %399, i32 3 %404 = bitcast float %138 to i32 %405 = bitcast float %139 to i32 %406 = insertelement <2 x i32> undef, i32 %404, i32 0 %407 = insertelement <2 x i32> %406, i32 %405, i32 1 %408 = bitcast <8 x i32> %95 to <32 x i8> %409 = bitcast <4 x i32> %97 to <16 x i8> %410 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %407, <32 x i8> %408, <16 x i8> %409, i32 2) %411 = extractelement <4 x float> %410, i32 0 %412 = fmul float %400, -2.000000e+00 %413 = fadd float %412, 1.000000e+00 %414 = fmul float %401, -2.000000e+00 %415 = fadd float %414, 1.000000e+00 %416 = fmul float %308, %413 %417 = fmul float %308, %415 %418 = fmul float %413, %308 %419 = fadd float %418, %369 %420 = fmul float %415, %308 %421 = fadd float %420, %371 %422 = fmul float %411, 2.000000e+00 %423 = fadd float %422, -1.000000e+00 %424 = fmul float %402, %312 %425 = fadd float %424, %390 %426 = fmul float %403, %312 %427 = fadd float %426, %391 %428 = fmul float %423, %312 %429 = fadd float %428, %392 %430 = fmul float %383, %416 %431 = fmul float %384, %417 %432 = fmul float %337, %417 %433 = fmul float %338, %416 %434 = fadd float %432, %433 %435 = fadd float %425, %430 %436 = fadd float %427, %431 %437 = fadd float %429, %434 %438 = fadd float %366, %366 %439 = fadd float %367, %367 %440 = fmul float %416, %438 %441 = fmul float %417, %439 %442 = fmul float %366, %417 %443 = fmul float %367, %416 %444 = fadd float %442, %443 %445 = fadd float %435, %440 %446 = fadd float %436, %441 %447 = fadd float %437, %444 %448 = bitcast float %140 to i32 %449 = bitcast float %141 to i32 %450 = insertelement <2 x i32> undef, i32 %448, i32 0 %451 = insertelement <2 x i32> %450, i32 %449, i32 1 %452 = bitcast <8 x i32> %91 to <32 x i8> %453 = bitcast <4 x i32> %93 to <16 x i8> %454 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %451, <32 x i8> %452, <16 x i8> %453, i32 2) %455 = extractelement <4 x float> %454, i32 0 %456 = extractelement <4 x float> %454, i32 1 %457 = extractelement <4 x float> %454, i32 2 %458 = extractelement <4 x float> %454, i32 3 %459 = bitcast float %140 to i32 %460 = bitcast float %141 to i32 %461 = insertelement <2 x i32> undef, i32 %459, i32 0 %462 = insertelement <2 x i32> %461, i32 %460, i32 1 %463 = bitcast <8 x i32> %95 to <32 x i8> %464 = bitcast <4 x i32> %97 to <16 x i8> %465 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %462, <32 x i8> %463, <16 x i8> %464, i32 2) %466 = extractelement <4 x float> %465, i32 0 %467 = fadd float %456, %456 %468 = fadd float %455, %455 %469 = fmul float %467, -1.000000e+00 %470 = fadd float %469, 1.000000e+00 %471 = fmul float %468, 1.000000e+00 %472 = fadd float %471, -1.000000e+00 %473 = fmul float %309, %470 %474 = fmul float %309, %472 %475 = fmul float %472, %309 %476 = fadd float %475, %419 %477 = fmul float %470, %309 %478 = fadd float %477, %421 %479 = fmul float %466, -2.000000e+00 %480 = fadd float %479, 1.000000e+00 %481 = fmul float %457, %313 %482 = fadd float %481, %445 %483 = fmul float %458, %313 %484 = fadd float %483, %446 %485 = fmul float %480, %313 %486 = fadd float %485, %447 %487 = fmul float %383, %474 %488 = fmul float %384, %473 %489 = fmul float %337, %473 %490 = fmul float %338, %474 %491 = fadd float %489, %490 %492 = fadd float %487, %482 %493 = fadd float %488, %484 %494 = fadd float %491, %486 %495 = fmul float %438, %474 %496 = fmul float %439, %473 %497 = fmul float %366, %473 %498 = fmul float %367, %474 %499 = fadd float %497, %498 %500 = fadd float %492, %495 %501 = fadd float %493, %496 %502 = fadd float %494, %499 %503 = fmul float %416, %474 %504 = fmul float %417, %473 %505 = fadd float %503, %503 %506 = fadd float %504, %504 %507 = fmul float %416, %473 %508 = fmul float %417, %474 %509 = fadd float %507, %508 %510 = fadd float %500, %505 %511 = fadd float %501, %506 %512 = fadd float %502, %509 %513 = fsub float -0.000000e+00, %476 %514 = fmul float %476, %513 %515 = fadd float %514, %510 %516 = fsub float -0.000000e+00, %478 %517 = fmul float %478, %516 %518 = fadd float %517, %511 %519 = fsub float -0.000000e+00, %478 %520 = fmul float %476, %519 %521 = fadd float %520, %512 %522 = fmul float %25, %476 %523 = fmul float %26, %478 %524 = fadd float %523, %522 %525 = fmul float %27, 1.000000e+00 %526 = fadd float %524, %525 %527 = call float @llvm.AMDIL.clamp.(float %526, float 0.000000e+00, float 1.000000e+00) %528 = fsub float -0.000000e+00, %142 %529 = fadd float %50, %528 %530 = fsub float -0.000000e+00, %143 %531 = fadd float %51, %530 %532 = fsub float -0.000000e+00, %144 %533 = fadd float %52, %532 %534 = fmul float %529, %529 %535 = fmul float %531, %531 %536 = fadd float %535, %534 %537 = fmul float %533, %533 %538 = fadd float %536, %537 %539 = fcmp uge float %538, 0x3E7AD7F2A0000000 %540 = select i1 %539, float %538, float 0x3E7AD7F2A0000000 %541 = call float @llvm.AMDGPU.rsq.clamped.f32(float %540) %542 = fmul float %541, %529 %543 = fmul float %541, %531 %544 = fmul float %541, %533 %545 = fsub float -0.000000e+00, %39 %546 = fmul float %529, %541 %547 = fadd float %546, %545 %548 = fsub float -0.000000e+00, %40 %549 = fmul float %531, %541 %550 = fadd float %549, %548 %551 = fsub float -0.000000e+00, %41 %552 = fmul float %533, %541 %553 = fadd float %552, %551 %554 = fmul float %42, %547 %555 = fadd float %554, %39 %556 = fmul float %42, %550 %557 = fadd float %556, %40 %558 = fmul float %42, %553 %559 = fadd float %558, %41 %560 = fmul float %555, %555 %561 = fmul float %557, %557 %562 = fadd float %561, %560 %563 = fmul float %559, %559 %564 = fadd float %562, %563 %565 = fcmp uge float %564, 0x3E7AD7F2A0000000 %566 = select i1 %565, float %564, float 0x3E7AD7F2A0000000 %567 = call float @llvm.AMDGPU.rsq.clamped.f32(float %566) %568 = fmul float %555, %567 %569 = fadd float %568, %25 %570 = fmul float %557, %567 %571 = fadd float %570, %26 %572 = fmul float %559, %567 %573 = fadd float %572, %27 %574 = fdiv float 1.000000e+00, %573 %575 = fsub float -0.000000e+00, %476 %576 = fmul float %569, %574 %577 = fadd float %576, %575 %578 = fsub float -0.000000e+00, %478 %579 = fmul float %571, %574 %580 = fadd float %579, %578 %581 = fdiv float 1.000000e+00, %34 %582 = fadd float %515, %581 %583 = fadd float %518, %581 %584 = fmul float %521, %521 %585 = fsub float -0.000000e+00, %584 %586 = fmul float %582, %583 %587 = fadd float %586, %585 %588 = fmul float %577, %577 %589 = fadd float %521, %521 %590 = fmul float %577, %589 %591 = fsub float -0.000000e+00, %590 %592 = fmul float %582, %580 %593 = fadd float %592, %591 %594 = fmul float %580, %593 %595 = fmul float %588, %583 %596 = fadd float %595, %594 %597 = fmul float %596, 5.000000e-01 %598 = fdiv float 1.000000e+00, %587 %599 = fmul float %598, %597 %600 = fsub float -0.000000e+00, %587 %601 = fcmp oge float %600, 0.000000e+00 %602 = sext i1 %601 to i32 %603 = bitcast i32 %602 to float %604 = bitcast float %603 to i32 %605 = icmp ne i32 %604, 0 %.167 = select i1 %605, float 1.000000e+00, float 0.000000e+00 %606 = fmul float %597, %598 %607 = fadd float %606, -1.600000e+01 %608 = fcmp oge float %607, 0.000000e+00 %609 = sext i1 %608 to i32 %610 = bitcast i32 %609 to float %611 = bitcast float %610 to i32 %612 = icmp ne i32 %611, 0 %temp64.0 = select i1 %612, float 1.000000e+00, float 0.000000e+00 %613 = fmul float %599, 0xBFF7154CA0000000 %614 = call float @llvm.AMDIL.exp.(float %613) %615 = fcmp uge float %587, 0x3E7AD7F2A0000000 %616 = select i1 %615, float %587, float 0x3E7AD7F2A0000000 %617 = call float @llvm.AMDGPU.rsq.clamped.f32(float %616) %618 = fmul float %614, %617 %619 = fmul float %581, 1.600000e+01 %620 = fadd float %619, %515 %621 = fmul float %581, 1.600000e+01 %622 = fadd float %621, %518 %623 = fsub float -0.000000e+00, %584 %624 = fmul float %620, %622 %625 = fadd float %624, %623 %626 = fsub float -0.000000e+00, %590 %627 = fmul float %620, %580 %628 = fadd float %627, %626 %629 = fmul float %580, %628 %630 = fmul float %588, %622 %631 = fadd float %630, %629 %632 = fmul float %631, 5.000000e-01 %633 = fdiv float 1.000000e+00, %625 %634 = fmul float %633, %632 %635 = fsub float -0.000000e+00, %625 %636 = fcmp oge float %635, 0.000000e+00 %637 = sext i1 %636 to i32 %638 = bitcast i32 %637 to float %639 = bitcast float %638 to i32 %640 = icmp ne i32 %639, 0 %.168 = select i1 %640, float 1.000000e+00, float 0.000000e+00 %641 = fmul float %632, %633 %642 = fadd float %641, -1.600000e+01 %643 = fcmp oge float %642, 0.000000e+00 %644 = sext i1 %643 to i32 %645 = bitcast i32 %644 to float %646 = bitcast float %645 to i32 %647 = icmp ne i32 %646, 0 %temp56.0 = select i1 %647, float 1.000000e+00, float 0.000000e+00 %648 = fadd float %temp56.0, %.168 %649 = fadd float %temp64.0, %.167 %650 = fmul float %634, 0xBFF7154CA0000000 %651 = call float @llvm.AMDIL.exp.(float %650) %652 = fcmp uge float %625, 0x3E7AD7F2A0000000 %653 = select i1 %652, float %625, float 0x3E7AD7F2A0000000 %654 = call float @llvm.AMDGPU.rsq.clamped.f32(float %653) %655 = fmul float %654, %651 %656 = fsub float -0.000000e+00, %43 %657 = fmul float %529, %541 %658 = fadd float %657, %656 %659 = fsub float -0.000000e+00, %44 %660 = fmul float %531, %541 %661 = fadd float %660, %659 %662 = fsub float -0.000000e+00, %45 %663 = fmul float %533, %541 %664 = fadd float %663, %662 %665 = fmul float %46, %658 %666 = fadd float %665, %43 %667 = fmul float %46, %661 %668 = fadd float %667, %44 %669 = fmul float %46, %664 %670 = fadd float %669, %45 %671 = fmul float %666, %666 %672 = fmul float %668, %668 %673 = fadd float %672, %671 %674 = fmul float %670, %670 %675 = fadd float %673, %674 %676 = fcmp uge float %675, 0x3E7AD7F2A0000000 %677 = select i1 %676, float %675, float 0x3E7AD7F2A0000000 %678 = call float @llvm.AMDGPU.rsq.clamped.f32(float %677) %679 = fmul float %666, %678 %680 = fadd float %679, %28 %681 = fmul float %668, %678 %682 = fadd float %681, %29 %683 = fmul float %670, %678 %684 = fadd float %683, %30 %685 = fdiv float 1.000000e+00, %684 %686 = fsub float -0.000000e+00, %476 %687 = fmul float %680, %685 %688 = fadd float %687, %686 %689 = fsub float -0.000000e+00, %478 %690 = fmul float %682, %685 %691 = fadd float %690, %689 %692 = fdiv float 1.000000e+00, %38 %693 = fadd float %692, %515 %694 = fadd float %692, %518 %695 = fsub float -0.000000e+00, %584 %696 = fmul float %693, %694 %697 = fadd float %696, %695 %698 = fmul float %688, %688 %699 = fmul float %589, %688 %700 = fsub float -0.000000e+00, %699 %701 = fmul float %693, %691 %702 = fadd float %701, %700 %703 = fmul float %691, %702 %704 = fmul float %698, %694 %705 = fadd float %704, %703 %706 = fmul float %705, 5.000000e-01 %707 = fdiv float 1.000000e+00, %697 %708 = fmul float %706, %707 %709 = fsub float -0.000000e+00, %697 %710 = fcmp oge float %709, 0.000000e+00 %711 = sext i1 %710 to i32 %712 = bitcast i32 %711 to float %713 = bitcast float %712 to i32 %714 = icmp ne i32 %713, 0 %.169 = select i1 %714, float 1.000000e+00, float 0.000000e+00 %715 = fmul float %706, %707 %716 = fadd float %715, -1.600000e+01 %717 = fcmp oge float %716, 0.000000e+00 %718 = sext i1 %717 to i32 %719 = bitcast i32 %718 to float %720 = bitcast float %719 to i32 %721 = icmp ne i32 %720, 0 %temp28.0 = select i1 %721, float 1.000000e+00, float 0.000000e+00 %722 = fadd float %temp28.0, %.169 %723 = fmul float %708, 0xBFF7154CA0000000 %724 = fmul float %655, 0x3F747AE140000000 %725 = fcmp uge float %697, 0x3E7AD7F2A0000000 %726 = select i1 %725, float %697, float 0x3E7AD7F2A0000000 %727 = call float @llvm.AMDGPU.rsq.clamped.f32(float %726) %728 = call float @llvm.AMDIL.exp.(float %723) %729 = fmul float %727, %728 %730 = fmul float %56, %143 %731 = fmul float %57, %143 %732 = fmul float %58, %143 %733 = fmul float %142, %53 %734 = fadd float %733, %730 %735 = fmul float %142, %54 %736 = fadd float %735, %731 %737 = fmul float %142, %55 %738 = fadd float %737, %732 %739 = fmul float %144, %59 %740 = fadd float %739, %734 %741 = fmul float %144, %60 %742 = fadd float %741, %736 %743 = fmul float %144, %61 %744 = fadd float %743, %738 %745 = fadd float %740, %62 %746 = fadd float %742, %63 %747 = fadd float %744, %64 %748 = fdiv float 1.000000e+00, %747 %749 = fmul float %748, %745 %750 = fmul float %748, %746 %751 = fmul float %749, 5.000000e-01 %752 = fadd float %751, -5.000000e-01 %753 = fmul float %750, -5.000000e-01 %754 = fadd float %753, -5.000000e-01 %755 = fmul float %515, 6.000000e+00 %756 = fadd float %755, %752 %757 = fmul float %518, 6.000000e+00 %758 = fadd float %757, %754 %759 = fmul float %697, 2.000000e+00 %760 = fadd float %759, %756 %761 = fmul float %697, 2.000000e+00 %762 = fadd float %761, %758 %763 = fmul float %697, 2.000000e+00 %764 = fadd float %763, %756 %765 = fmul float %697, -2.000000e+00 %766 = fadd float %765, %758 %767 = bitcast float %764 to i32 %768 = bitcast float %766 to i32 %769 = insertelement <2 x i32> undef, i32 %767, i32 0 %770 = insertelement <2 x i32> %769, i32 %768, i32 1 %771 = bitcast <8 x i32> %99 to <32 x i8> %772 = bitcast <4 x i32> %101 to <16 x i8> %773 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %770, <32 x i8> %771, <16 x i8> %772, i32 2) %774 = extractelement <4 x float> %773, i32 0 %775 = extractelement <4 x float> %773, i32 1 %776 = extractelement <4 x float> %773, i32 2 %777 = extractelement <4 x float> %773, i32 3 %778 = fmul float %774, 2.500000e-01 %779 = fmul float %775, 2.500000e-01 %780 = fmul float %776, 2.500000e-01 %781 = fmul float %777, 2.500000e-01 %782 = bitcast float %760 to i32 %783 = bitcast float %762 to i32 %784 = insertelement <2 x i32> undef, i32 %782, i32 0 %785 = insertelement <2 x i32> %784, i32 %783, i32 1 %786 = bitcast <8 x i32> %99 to <32 x i8> %787 = bitcast <4 x i32> %101 to <16 x i8> %788 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %785, <32 x i8> %786, <16 x i8> %787, i32 2) %789 = extractelement <4 x float> %788, i32 0 %790 = extractelement <4 x float> %788, i32 1 %791 = extractelement <4 x float> %788, i32 2 %792 = extractelement <4 x float> %788, i32 3 %793 = fmul float %789, 2.500000e-01 %794 = fadd float %793, %778 %795 = fmul float %790, 2.500000e-01 %796 = fadd float %795, %779 %797 = fmul float %791, 2.500000e-01 %798 = fadd float %797, %780 %799 = fmul float %792, 2.500000e-01 %800 = fadd float %799, %781 %801 = fmul float %697, -2.000000e+00 %802 = fadd float %801, %756 %803 = fmul float %697, 6.000000e+00 %804 = fadd float %803, %758 %805 = bitcast float %802 to i32 %806 = bitcast float %804 to i32 %807 = insertelement <2 x i32> undef, i32 %805, i32 0 %808 = insertelement <2 x i32> %807, i32 %806, i32 1 %809 = bitcast <8 x i32> %99 to <32 x i8> %810 = bitcast <4 x i32> %101 to <16 x i8> %811 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %808, <32 x i8> %809, <16 x i8> %810, i32 2) %812 = extractelement <4 x float> %811, i32 0 %813 = extractelement <4 x float> %811, i32 1 %814 = extractelement <4 x float> %811, i32 2 %815 = extractelement <4 x float> %811, i32 3 %816 = fmul float %812, 2.500000e-01 %817 = fadd float %816, %794 %818 = fmul float %813, 2.500000e-01 %819 = fadd float %818, %796 %820 = fmul float %814, 2.500000e-01 %821 = fadd float %820, %798 %822 = fmul float %815, 2.500000e-01 %823 = fadd float %822, %800 %824 = fmul float %697, -2.000000e+00 %825 = fadd float %824, %756 %826 = fmul float %697, -2.000000e+00 %827 = fadd float %826, %758 %828 = bitcast float %825 to i32 %829 = bitcast float %827 to i32 %830 = insertelement <2 x i32> undef, i32 %828, i32 0 %831 = insertelement <2 x i32> %830, i32 %829, i32 1 %832 = bitcast <8 x i32> %99 to <32 x i8> %833 = bitcast <4 x i32> %101 to <16 x i8> %834 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %831, <32 x i8> %832, <16 x i8> %833, i32 2) %835 = extractelement <4 x float> %834, i32 0 %836 = extractelement <4 x float> %834, i32 1 %837 = extractelement <4 x float> %834, i32 2 %838 = extractelement <4 x float> %834, i32 3 %839 = fmul float %835, 2.500000e-01 %840 = fadd float %839, %817 %841 = fmul float %836, 2.500000e-01 %842 = fadd float %841, %819 %843 = fmul float %837, 2.500000e-01 %844 = fadd float %843, %821 %845 = fmul float %838, 2.500000e-01 %846 = fadd float %845, %823 %847 = call float @fabs(float %840) %848 = call float @llvm.log2.f32(float %847) %849 = call float @fabs(float %842) %850 = call float @llvm.log2.f32(float %849) %851 = call float @fabs(float %844) %852 = call float @llvm.log2.f32(float %851) %853 = fmul float %848, 0x40019999A0000000 %854 = fmul float %850, 0x40019999A0000000 %855 = fmul float %852, 0x40019999A0000000 %856 = call float @llvm.AMDIL.exp.(float %853) %857 = call float @llvm.AMDIL.exp.(float %854) %858 = call float @llvm.AMDIL.exp.(float %855) %859 = fmul float %476, %542 %860 = fmul float %478, %543 %861 = fadd float %860, %859 %862 = fmul float 1.000000e+00, %544 %863 = fadd float %861, %862 %864 = fsub float -0.000000e+00, %863 %865 = fadd float %864, 1.000000e+00 %866 = fmul float %865, %865 %867 = fmul float %866, %866 %868 = fmul float %865, %867 %869 = fmul float %618, %31 %870 = fmul float %618, %32 %871 = fmul float %618, %33 %872 = fsub float -0.000000e+00, %649 %873 = fsub float -0.000000e+00, %649 %874 = fsub float -0.000000e+00, %649 %875 = fsub float -0.000000e+00, %649 %876 = fcmp oge float %872, 0.000000e+00 %877 = sext i1 %876 to i32 %878 = bitcast i32 %877 to float %879 = bitcast float %878 to i32 %880 = icmp ne i32 %879, 0 %.170 = select i1 %880, float %869, float 0.000000e+00 %881 = fcmp oge float %873, 0.000000e+00 %882 = sext i1 %881 to i32 %883 = bitcast i32 %882 to float %884 = bitcast float %883 to i32 %885 = icmp ne i32 %884, 0 %temp76.0 = select i1 %885, float %870, float 0.000000e+00 %886 = fcmp oge float %874, 0.000000e+00 %887 = sext i1 %886 to i32 %888 = bitcast i32 %887 to float %889 = bitcast float %888 to i32 %890 = icmp ne i32 %889, 0 %.171 = select i1 %890, float %871, float 0.000000e+00 %891 = fcmp oge float %875, 0.000000e+00 %892 = sext i1 %891 to i32 %893 = bitcast i32 %892 to float %894 = bitcast float %893 to i32 %895 = icmp ne i32 %894, 0 %896 = fmul float %729, %35 %897 = fmul float %729, %36 %898 = fmul float %729, %37 %899 = fsub float -0.000000e+00, %722 %900 = fsub float -0.000000e+00, %722 %901 = fsub float -0.000000e+00, %722 %902 = fsub float -0.000000e+00, %722 %903 = fcmp oge float %899, 0.000000e+00 %904 = sext i1 %903 to i32 %905 = bitcast i32 %904 to float %906 = bitcast float %905 to i32 %907 = icmp ne i32 %906, 0 %908 = fcmp oge float %900, 0.000000e+00 %909 = sext i1 %908 to i32 %910 = bitcast i32 %909 to float %911 = bitcast float %910 to i32 %912 = icmp ne i32 %911, 0 %temp68.0 = select i1 %912, float %896, float 0.000000e+00 %913 = fcmp oge float %901, 0.000000e+00 %914 = sext i1 %913 to i32 %915 = bitcast i32 %914 to float %916 = bitcast float %915 to i32 %917 = icmp ne i32 %916, 0 %.172 = select i1 %917, float %897, float 0.000000e+00 %918 = fcmp oge float %902, 0.000000e+00 %919 = sext i1 %918 to i32 %920 = bitcast i32 %919 to float %921 = bitcast float %920 to i32 %922 = icmp ne i32 %921, 0 %temp28.1 = select i1 %922, float %898, float 0.000000e+00 %923 = fadd float %temp68.0, %.170 %924 = fadd float %.172, %temp76.0 %925 = fadd float %temp28.1, %.171 %926 = fmul float %923, %868 %927 = fadd float %926, %252 %928 = fmul float %924, %868 %929 = fadd float %928, %253 %930 = fmul float %925, %868 %931 = fadd float %930, %254 %932 = fmul float %252, %31 %933 = fmul float %253, %32 %934 = fmul float %254, %33 %935 = fmul float %724, %932 %936 = fmul float %724, %933 %937 = fmul float %724, %934 %938 = fsub float -0.000000e+00, %648 %939 = fsub float -0.000000e+00, %648 %940 = fsub float -0.000000e+00, %648 %941 = fsub float -0.000000e+00, %648 %942 = fcmp oge float %938, 0.000000e+00 %943 = sext i1 %942 to i32 %944 = bitcast i32 %943 to float %945 = bitcast float %944 to i32 %946 = icmp ne i32 %945, 0 %.173 = select i1 %946, float %935, float 0.000000e+00 %947 = fcmp oge float %939, 0.000000e+00 %948 = sext i1 %947 to i32 %949 = bitcast i32 %948 to float %950 = bitcast float %949 to i32 %951 = icmp ne i32 %950, 0 %952 = fcmp oge float %940, 0.000000e+00 %953 = sext i1 %952 to i32 %954 = bitcast i32 %953 to float %955 = bitcast float %954 to i32 %956 = icmp ne i32 %955, 0 %temp56.2 = select i1 %956, float %936, float 0.000000e+00 %957 = fcmp oge float %941, 0.000000e+00 %958 = sext i1 %957 to i32 %959 = bitcast i32 %958 to float %960 = bitcast float %959 to i32 %961 = icmp ne i32 %960, 0 %.174 = select i1 %961, float %937, float 0.000000e+00 %962 = fmul float %927, %527 %963 = fadd float %962, %.173 %964 = fmul float %929, %527 %965 = fadd float %964, %temp56.2 %966 = fmul float %931, %527 %967 = fadd float %966, %.174 %968 = fmul float %856, %89 %969 = fadd float %968, %963 %970 = fmul float %857, %89 %971 = fadd float %970, %965 %972 = fmul float %858, %89 %973 = fadd float %972, %967 %974 = fmul float %708, %708 %975 = fmul float %708, %974 %976 = fmul float %232, %975 %977 = fmul float %976, %252 %978 = fadd float %977, %969 %979 = fmul float %976, %253 %980 = fadd float %979, %971 %981 = fmul float %976, %254 %982 = fadd float %981, %973 %983 = fmul float %68, %143 %984 = fmul float %69, %143 %985 = fmul float %70, %143 %986 = fmul float %142, %65 %987 = fadd float %986, %983 %988 = fmul float %142, %66 %989 = fadd float %988, %984 %990 = fmul float %142, %67 %991 = fadd float %990, %985 %992 = fmul float %144, %71 %993 = fadd float %992, %987 %994 = fmul float %144, %72 %995 = fadd float %994, %989 %996 = fmul float %144, %73 %997 = fadd float %996, %991 %998 = fadd float %993, %74 %999 = fadd float %995, %75 %1000 = fadd float %997, %76 %1001 = fdiv float 1.000000e+00, %1000 %1002 = fmul float %1001, %998 %1003 = fmul float %1001, %999 %1004 = fmul float %1002, 5.000000e-01 %1005 = fadd float %1004, -5.000000e-01 %1006 = fmul float %1003, -5.000000e-01 %1007 = fadd float %1006, -5.000000e-01 %1008 = fmul float %1005, 1.000000e+00 %1009 = fadd float %1008, 0.000000e+00 %1010 = fmul float %1007, -1.000000e+00 %1011 = fadd float %1010, 1.000000e+00 %1012 = bitcast float %1009 to i32 %1013 = bitcast float %1011 to i32 %1014 = insertelement <2 x i32> undef, i32 %1012, i32 0 %1015 = insertelement <2 x i32> %1014, i32 %1013, i32 1 %1016 = bitcast <8 x i32> %131 to <32 x i8> %1017 = bitcast <4 x i32> %133 to <16 x i8> %1018 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1015, <32 x i8> %1016, <16 x i8> %1017, i32 2) %1019 = extractelement <4 x float> %1018, i32 0 %1020 = call float @llvm.AMDIL.clamp.(float %1019, float 0.000000e+00, float 1.000000e+00) %1021 = fmul float %1020, %85 %1022 = fmul float %1021, %978 %1023 = fmul float %1021, %980 %1024 = fmul float %1021, %982 %1025 = fmul float %1022, 0xBFE570A3E0000000 %1026 = fadd float %1025, %978 %1027 = fmul float %1023, 0xBFE570A3E0000000 %1028 = fadd float %1027, %980 %1029 = fmul float %1024, 0xBFE570A3E0000000 %1030 = fadd float %1029, %982 %1031 = fmul float %846, %255 %1032 = fadd float %1031, %255 %1033 = fmul float %846, %255 %1034 = fadd float %1033, %255 %1035 = fmul float %846, %255 %1036 = fadd float %1035, %255 %1037 = fmul float %846, %255 %1038 = fadd float %1037, %255 %1039 = call float @llvm.AMDIL.clamp.(float %1032, float 0.000000e+00, float 1.000000e+00) %1040 = call float @llvm.AMDIL.clamp.(float %1034, float 0.000000e+00, float 1.000000e+00) %1041 = call float @llvm.AMDIL.clamp.(float %1036, float 0.000000e+00, float 1.000000e+00) %1042 = call float @llvm.AMDIL.clamp.(float %1038, float 0.000000e+00, float 1.000000e+00) %1043 = call float @fabs(float %1026) %1044 = call float @llvm.log2.f32(float %1043) %1045 = call float @fabs(float %1028) %1046 = call float @llvm.log2.f32(float %1045) %1047 = call float @fabs(float %1030) %1048 = call float @llvm.log2.f32(float %1047) %1049 = fmul float %1044, 0x3FDD1743E0000000 %1050 = fmul float %1046, 0x3FDD1743E0000000 %1051 = fmul float %1048, 0x3FDD1743E0000000 %1052 = call float @llvm.AMDIL.exp.(float %1049) %1053 = call float @llvm.AMDIL.exp.(float %1050) %1054 = call float @llvm.AMDIL.exp.(float %1051) %1055 = fmul float %147, 1.000000e+00 %1056 = fadd float %1055, 0.000000e+00 %1057 = fmul float %148, -1.000000e+00 %1058 = fadd float %1057, 1.000000e+00 %1059 = bitcast float %1056 to i32 %1060 = bitcast float %1058 to i32 %1061 = insertelement <2 x i32> undef, i32 %1059, i32 0 %1062 = insertelement <2 x i32> %1061, i32 %1060, i32 1 %1063 = bitcast <8 x i32> %119 to <32 x i8> %1064 = bitcast <4 x i32> %121 to <16 x i8> %1065 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1062, <32 x i8> %1063, <16 x i8> %1064, i32 2) %1066 = extractelement <4 x float> %1065, i32 0 %1067 = extractelement <4 x float> %1065, i32 1 %1068 = extractelement <4 x float> %1065, i32 2 %1069 = extractelement <4 x float> %1065, i32 3 %1070 = fsub float -0.000000e+00, %1069 %1071 = fadd float %1070, 1.000000e+00 %1072 = fmul float %1052, %1071 %1073 = fadd float %1072, %1066 %1074 = fmul float %1053, %1071 %1075 = fadd float %1074, %1067 %1076 = fmul float %1054, %1071 %1077 = fadd float %1076, %1068 %1078 = fadd float %1073, 0xBFD99999A0000000 %1079 = fadd float %1075, 0xBFD99999A0000000 %1080 = fadd float %1077, 0xBFD99999A0000000 %1081 = fmul float %162, %1078 %1082 = fadd float %1081, 0x3FD99999A0000000 %1083 = fmul float %162, %1079 %1084 = fadd float %1083, 0x3FD99999A0000000 %1085 = fmul float %162, %1080 %1086 = fadd float %1085, 0x3FD99999A0000000 %1087 = bitcast float %1082 to i32 %1088 = bitcast float %1084 to i32 %1089 = bitcast float %1086 to i32 %1090 = insertelement <4 x i32> undef, i32 %1087, i32 0 %1091 = insertelement <4 x i32> %1090, i32 %1088, i32 1 %1092 = insertelement <4 x i32> %1091, i32 %1089, i32 2 %1093 = insertelement <4 x i32> %1092, i32 undef, i32 3 %1094 = bitcast <8 x i32> %127 to <32 x i8> %1095 = bitcast <4 x i32> %129 to <16 x i8> %1096 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %1093, <32 x i8> %1094, <16 x i8> %1095, i32 3) %1097 = extractelement <4 x float> %1096, i32 0 %1098 = extractelement <4 x float> %1096, i32 1 %1099 = extractelement <4 x float> %1096, i32 2 %1100 = fsub float -0.000000e+00, %162 %1101 = fmul float %163, %1100 %1102 = fadd float %1101, %162 %1103 = call float @llvm.AMDGPU.lrp(float %1102, float %1097, float %1082) %1104 = call float @llvm.AMDGPU.lrp(float %1102, float %1098, float %1084) %1105 = call float @llvm.AMDGPU.lrp(float %1102, float %1099, float %1086) %1106 = fdiv float 1.000000e+00, %146 %1107 = fmul float %1106, %145 %1108 = fmul float %1107, 5.000000e-01 %1109 = fadd float %1108, 5.000000e-01 %1110 = bitcast float %1109 to i32 %1111 = bitcast float %49 to i32 %1112 = insertelement <2 x i32> undef, i32 %1110, i32 0 %1113 = insertelement <2 x i32> %1112, i32 %1111, i32 1 %1114 = bitcast <8 x i32> %115 to <32 x i8> %1115 = bitcast <4 x i32> %117 to <16 x i8> %1116 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1113, <32 x i8> %1114, <16 x i8> %1115, i32 2) %1117 = extractelement <4 x float> %1116, i32 0 %1118 = fsub float -0.000000e+00, %50 %1119 = fadd float %1118, %142 %1120 = fsub float -0.000000e+00, %51 %1121 = fadd float %1120, %143 %1122 = fsub float -0.000000e+00, %52 %1123 = fadd float %1122, %144 %1124 = fmul float %1119, %1119 %1125 = fmul float %1121, %1121 %1126 = fadd float %1125, %1124 %1127 = fmul float %1123, %1123 %1128 = fadd float %1126, %1127 %1129 = fmul float %1128, %48 %1130 = fmul float %1123, %47 %1131 = fmul float %1130, 0x3FF7154CA0000000 %1132 = call float @llvm.AMDIL.exp.(float %1131) %1133 = fsub float -0.000000e+00, %1132 %1134 = fadd float %1133, 1.000000e+00 %1135 = fmul float %1134, %1129 %1136 = fdiv float 1.000000e+00, %1123 %1137 = fmul float %1136, %1135 %1138 = fmul float %1137, 0x3FF7154CA0000000 %1139 = call float @llvm.AMDIL.exp.(float %1138) %1140 = call float @llvm.AMDIL.clamp.(float %1139, float 0.000000e+00, float 1.000000e+00) %1141 = fsub float -0.000000e+00, %1140 %1142 = fadd float %1141, 1.000000e+00 %1143 = fmul float %1142, %1117 %1144 = fsub float -0.000000e+00, %1103 %1145 = fadd float %1144, %86 %1146 = fsub float -0.000000e+00, %1104 %1147 = fadd float %1146, %87 %1148 = fsub float -0.000000e+00, %1105 %1149 = fadd float %1148, %88 %1150 = fmul float %1143, %1145 %1151 = fadd float %1150, %1103 %1152 = fmul float %1143, %1147 %1153 = fadd float %1152, %1104 %1154 = fmul float %1143, %1149 %1155 = fadd float %1154, %1105 %1156 = call i32 @llvm.SI.packf16(float %1151, float %1153) %1157 = bitcast i32 %1156 to float %1158 = call i32 @llvm.SI.packf16(float %1155, float %1042) %1159 = bitcast i32 %1158 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1157, float %1159, float %1157, float %1159) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #4 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #3 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } attributes #4 = { nounwind readnone readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v11, v0, 0, 6, [m0] ; C82C1800 V_INTERP_P2_F32 v11, [v11], v1, 0, 6, [m0] ; C82D1801 V_INTERP_P1_F32 v13, v0, 1, 6, [m0] ; C8341900 V_INTERP_P2_F32 v13, [v13], v1, 1, 6, [m0] ; C8351901 V_SUB_F32_e32 v12, 1.000000e+00, v13 ; 08181AF2 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x20 ; C0860520 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x40 ; C0C80740 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800F00 0064020B V_MOV_B32_e32 v6, 0x80000000 ; 7E0C02FF 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 V_OR_B32_e32 v7, v2, v6 ; 380E0D02 V_CMP_GE_F32_e64 s[0:1], v7, 0.000000e+00, 0, 0 ; D00C0000 00010107 V_CNDMASK_B32_e64 v6, v6, -1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000006 0001E706 V_CMP_LT_F32_e64 s[0:1], v6, 0.000000e+00, 0, 0 ; D0020000 00010106 V_CNDMASK_B32_e64 v6, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000006 00018280 V_OR_B32_e32 v6, v6, v6 ; 380C0D06 V_CMP_NE_I32_e64 s[0:1], v6, 0, 0, 0 ; D10A0000 00010106 V_INTERP_P1_F32 v24, v0, 3, 6, [m0] ; C8601B00 V_INTERP_P2_F32 v24, [v24], v1, 3, 6, [m0] ; C8611B01 V_INTERP_P1_F32 v23, v0, 2, 6, [m0] ; C85C1A00 V_INTERP_P2_F32 v23, [v23], v1, 2, 6, [m0] ; C85D1A01 V_INTERP_P1_F32 v6, v0, 2, 5, [m0] ; C8181600 V_INTERP_P2_F32 v6, [v6], v1, 2, 5, [m0] ; C8191601 V_INTERP_P1_F32 v7, v0, 1, 5, [m0] ; C81C1500 V_INTERP_P2_F32 v7, [v7], v1, 1, 5, [m0] ; C81D1501 V_INTERP_P1_F32 v8, v0, 2, 4, [m0] ; C8201200 V_INTERP_P2_F32 v8, [v8], v1, 2, 4, [m0] ; C8211201 V_INTERP_P1_F32 v9, v0, 1, 4, [m0] ; C8241100 V_INTERP_P2_F32 v9, [v9], v1, 1, 4, [m0] ; C8251101 V_INTERP_P1_F32 v10, v0, 0, 4, [m0] ; C8281000 V_INTERP_P2_F32 v10, [v10], v1, 0, 4, [m0] ; C8291001 V_INTERP_P1_F32 v59, v0, 1, 3, [m0] ; C8EC0D00 V_INTERP_P2_F32 v59, [v59], v1, 1, 3, [m0] ; C8ED0D01 V_INTERP_P1_F32 v58, v0, 0, 3, [m0] ; C8E80C00 V_INTERP_P2_F32 v58, [v58], v1, 0, 3, [m0] ; C8E90C01 V_INTERP_P1_F32 v61, v0, 1, 2, [m0] ; C8F40900 V_INTERP_P2_F32 v61, [v61], v1, 1, 2, [m0] ; C8F50901 V_INTERP_P1_F32 v60, v0, 0, 2, [m0] ; C8F00800 V_INTERP_P2_F32 v60, [v60], v1, 0, 2, [m0] ; C8F10801 V_INTERP_P1_F32 v64, v0, 1, 1, [m0] ; C9000500 V_INTERP_P2_F32 v64, [v64], v1, 1, 1, [m0] ; C9010501 V_INTERP_P1_F32 v63, v0, 0, 1, [m0] ; C8FC0400 V_INTERP_P2_F32 v63, [v63], v1, 0, 1, [m0] ; C8FD0401 V_INTERP_P1_F32 v73, v0, 1, 0, [m0] ; C9240100 V_INTERP_P2_F32 v73, [v73], v1, 1, 0, [m0] ; C9250101 V_INTERP_P1_F32 v72, v0, 0, 0, [m0] ; C9200000 V_INTERP_P2_F32 v72, [v72], v1, 0, 0, [m0] ; C9210001 S_LOAD_DWORDX4 s[8:11], s[2:3], 0x0 ; C0840300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s2, s[8:11], 0x5c ; C201095C S_BUFFER_LOAD_DWORD s3, s[8:11], 0x5a ; C201895A S_BUFFER_LOAD_DWORD s12, s[8:11], 0x59 ; C2060959 S_BUFFER_LOAD_DWORD s13, s[8:11], 0x58 ; C2068958 S_BUFFER_LOAD_DWORD s14, s[8:11], 0x54 ; C2070954 S_BUFFER_LOAD_DWORD s15, s[8:11], 0x51 ; C2078951 S_BUFFER_LOAD_DWORD s16, s[8:11], 0x50 ; C2080950 S_BUFFER_LOAD_DWORD s17, s[8:11], 0x4d ; C208894D S_BUFFER_LOAD_DWORD s18, s[8:11], 0x4c ; C209094C S_BUFFER_LOAD_DWORD s19, s[8:11], 0x49 ; C2098949 S_BUFFER_LOAD_DWORD s20, s[8:11], 0x48 ; C20A0948 S_BUFFER_LOAD_DWORD s21, s[8:11], 0x45 ; C20A8945 S_BUFFER_LOAD_DWORD s22, s[8:11], 0x44 ; C20B0944 S_BUFFER_LOAD_DWORD s23, s[8:11], 0x43 ; C20B8943 S_BUFFER_LOAD_DWORD s24, s[8:11], 0x41 ; C20C0941 S_BUFFER_LOAD_DWORD s25, s[8:11], 0x40 ; C20C8940 S_BUFFER_LOAD_DWORD s26, s[8:11], 0x3f ; C20D093F S_BUFFER_LOAD_DWORD s27, s[8:11], 0x3d ; C20D893D S_BUFFER_LOAD_DWORD s28, s[8:11], 0x3c ; C20E093C S_BUFFER_LOAD_DWORD s29, s[8:11], 0x3b ; C20E893B S_BUFFER_LOAD_DWORD s30, s[8:11], 0x39 ; C20F0939 S_BUFFER_LOAD_DWORD s31, s[8:11], 0x38 ; C20F8938 S_BUFFER_LOAD_DWORD s32, s[8:11], 0x37 ; C2100937 S_BUFFER_LOAD_DWORD s33, s[8:11], 0x35 ; C2108935 S_BUFFER_LOAD_DWORD s34, s[8:11], 0x34 ; C2110934 S_BUFFER_LOAD_DWORD s35, s[8:11], 0x33 ; C2118933 S_BUFFER_LOAD_DWORD s36, s[8:11], 0x31 ; C2120931 S_BUFFER_LOAD_DWORD s37, s[8:11], 0x30 ; C2128930 S_BUFFER_LOAD_DWORD s38, s[8:11], 0x2f ; C213092F S_BUFFER_LOAD_DWORD s39, s[8:11], 0x2d ; C213892D S_BUFFER_LOAD_DWORD s40, s[8:11], 0x2c ; C214092C S_BUFFER_LOAD_DWORD s41, s[8:11], 0x2b ; C214892B S_BUFFER_LOAD_DWORD s42, s[8:11], 0x29 ; C2150929 S_BUFFER_LOAD_DWORD s43, s[8:11], 0x28 ; C2158928 S_BUFFER_LOAD_DWORD s44, s[8:11], 0x27 ; C2160927 S_BUFFER_LOAD_DWORD s45, s[8:11], 0x25 ; C2168925 S_BUFFER_LOAD_DWORD s46, s[8:11], 0x24 ; C2170924 S_BUFFER_LOAD_DWORD s47, s[8:11], 0x22 ; C2178922 S_BUFFER_LOAD_DWORD s48, s[8:11], 0x21 ; C2180921 S_BUFFER_LOAD_DWORD s49, s[8:11], 0x20 ; C2188920 S_BUFFER_LOAD_DWORD s50, s[8:11], 0x1f ; C219091F S_BUFFER_LOAD_DWORD s51, s[8:11], 0x1d ; C219891D S_BUFFER_LOAD_DWORD s52, s[8:11], 0x1c ; C21A091C S_BUFFER_LOAD_DWORD s53, s[8:11], 0x1b ; C21A891B S_BUFFER_LOAD_DWORD s54, s[8:11], 0x1a ; C21B091A S_BUFFER_LOAD_DWORD s55, s[8:11], 0x19 ; C21B8919 S_BUFFER_LOAD_DWORD s56, s[8:11], 0x18 ; C21C0918 S_BUFFER_LOAD_DWORD s57, s[8:11], 0x17 ; C21C8917 S_BUFFER_LOAD_DWORD s58, s[8:11], 0x16 ; C21D0916 S_BUFFER_LOAD_DWORD s59, s[8:11], 0x15 ; C21D8915 S_BUFFER_LOAD_DWORD s60, s[8:11], 0x14 ; C21E0914 S_BUFFER_LOAD_DWORD s61, s[8:11], 0x13 ; C21E8913 S_BUFFER_LOAD_DWORD s62, s[8:11], 0x12 ; C21F0912 S_BUFFER_LOAD_DWORD s63, s[8:11], 0x11 ; C21F8911 S_BUFFER_LOAD_DWORD s64, s[8:11], 0x10 ; C2200910 S_BUFFER_LOAD_DWORD s65, s[8:11], 0xf ; C220890F S_BUFFER_LOAD_DWORD s66, s[8:11], 0xe ; C221090E S_BUFFER_LOAD_DWORD s67, s[8:11], 0xd ; C221890D S_BUFFER_LOAD_DWORD s68, s[8:11], 0xc ; C222090C S_BUFFER_LOAD_DWORD s69, s[8:11], 0xa ; C222890A S_BUFFER_LOAD_DWORD s70, s[8:11], 0x9 ; C2230909 S_BUFFER_LOAD_DWORD s71, s[8:11], 0x8 ; C2238908 S_BUFFER_LOAD_DWORD s72, s[8:11], 0x6 ; C2240906 S_BUFFER_LOAD_DWORD s73, s[8:11], 0x5 ; C2248905 S_BUFFER_LOAD_DWORD s74, s[8:11], 0x4 ; C2250904 S_BUFFER_LOAD_DWORD s8, s[8:11], 0x0 ; C2040900 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v17, s2 ; 7E220202 V_MOV_B32_e32 v0, s3 ; 7E000203 V_MOV_B32_e32 v14, s12 ; 7E1C020C V_MOV_B32_e32 v1, s13 ; 7E02020D V_MOV_B32_e32 v27, s14 ; 7E36020E V_MOV_B32_e32 v80, s15 ; 7EA0020F V_MOV_B32_e32 v82, s16 ; 7EA40210 V_MOV_B32_e32 v81, s17 ; 7EA20211 V_MOV_B32_e32 v85, s18 ; 7EAA0212 V_MOV_B32_e32 v83, s19 ; 7EA60213 V_MOV_B32_e32 v86, s20 ; 7EAC0214 V_MOV_B32_e32 v84, s21 ; 7EA80215 V_MOV_B32_e32 v87, s22 ; 7EAE0216 V_MOV_B32_e32 v32, s23 ; 7E400217 V_MOV_B32_e32 v28, s24 ; 7E380218 V_MOV_B32_e32 v36, s25 ; 7E480219 V_MOV_B32_e32 v34, s26 ; 7E44021A V_MOV_B32_e32 v30, s27 ; 7E3C021B V_MOV_B32_e32 v38, s28 ; 7E4C021C V_MOV_B32_e32 v37, s29 ; 7E4A021D V_MOV_B32_e32 v33, s30 ; 7E42021E V_MOV_B32_e32 v40, s31 ; 7E50021F V_MOV_B32_e32 v39, s32 ; 7E4E0220 V_MOV_B32_e32 v35, s33 ; 7E460221 V_MOV_B32_e32 v41, s34 ; 7E520222 V_MOV_B32_e32 v45, s35 ; 7E5A0223 V_MOV_B32_e32 v48, s36 ; 7E600224 V_MOV_B32_e32 v42, s37 ; 7E540225 V_MOV_B32_e32 v47, s38 ; 7E5E0226 V_MOV_B32_e32 v50, s39 ; 7E640227 V_MOV_B32_e32 v43, s40 ; 7E560228 V_MOV_B32_e32 v49, s41 ; 7E620229 V_MOV_B32_e32 v52, s42 ; 7E68022A V_MOV_B32_e32 v44, s43 ; 7E58022B V_MOV_B32_e32 v51, s44 ; 7E66022C V_MOV_B32_e32 v53, s45 ; 7E6A022D V_MOV_B32_e32 v46, s46 ; 7E5C022E V_MOV_B32_e32 v18, s47 ; 7E24022F V_MOV_B32_e32 v22, s48 ; 7E2C0230 V_MOV_B32_e32 v21, s49 ; 7E2A0231 V_MOV_B32_e32 v16, s50 ; 7E200232 V_MOV_B32_e32 v20, s51 ; 7E280233 V_MOV_B32_e32 v19, s52 ; 7E260234 V_MOV_B32_e32 v65, s53 ; 7E820235 V_MOV_B32_e32 v67, s54 ; 7E860236 V_MOV_B32_e32 v71, s55 ; 7E8E0237 V_MOV_B32_e32 v69, s56 ; 7E8A0238 V_MOV_B32_e32 v75, s57 ; 7E960239 V_MOV_B32_e32 v76, s58 ; 7E98023A V_MOV_B32_e32 v78, s59 ; 7E9C023B V_MOV_B32_e32 v77, s60 ; 7E9A023C V_MOV_B32_e32 v74, s61 ; 7E94023D V_MOV_B32_e32 v62, s62 ; 7E7C023E V_MOV_B32_e32 v31, s63 ; 7E3E023F V_MOV_B32_e32 v26, s64 ; 7E340240 V_MOV_B32_e32 v79, s65 ; 7E9E0241 V_MOV_B32_e32 v57, s66 ; 7E720242 V_MOV_B32_e32 v29, s67 ; 7E3A0243 V_MOV_B32_e32 v25, s68 ; 7E320244 V_MOV_B32_e32 v68, s69 ; 7E880245 V_MOV_B32_e32 v66, s70 ; 7E840246 V_MOV_B32_e32 v70, s71 ; 7E8C0247 V_MOV_B32_e32 v54, s72 ; 7E6C0248 V_MOV_B32_e32 v55, s73 ; 7E6E0249 V_MOV_B32_e32 v56, s74 ; 7E70024A V_MOV_B32_e32 v88, s8 ; 7EB00208 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E V_MUL_F32_e32 v86, v86, v9 ; 10AC1356 V_MAD_F32 v86, v10, v87, v86, 0, 0 ; D2820056 055AAF0A V_MAD_F32 v85, v8, v85, v86, 0, 0 ; D2820055 055AAB08 V_ADD_F32_e32 v85, v85, v82 ; 06AAA555 V_MUL_F32_e32 v82, v83, v9 ; 10A41353 V_MAD_F32 v82, v10, v84, v82, 0, 0 ; D2820052 054AA90A V_MAD_F32 v81, v8, v81, v82, 0, 0 ; D2820051 054AA308 V_ADD_F32_e32 v80, v81, v80 ; 06A0A151 V_SUB_F32_e32 v86, 1.000000e+00, v80 ; 08ACA0F2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0xc ; C080050C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x18 ; C0C40718 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[80:81], 3, 0, 0, 0, 0, 0, 0, 0, v[85:86], s[8:15], s[0:3] ; F0800300 00025055 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v82, v81, v81 ; 06A4A351 V_ADD_F32_e32 v82, -1.000000e+00, v82 ; 06A4A4F3 V_ADD_F32_e32 v80, v80, v80 ; 06A0A150 V_ADD_F32_e32 v80, -1.000000e+00, v80 ; 06A0A0F3 V_SUB_F32_e32 v81, v80, v82 ; 08A2A550 V_ADD_F32_e32 v83, 2.000000e+00, v81 ; 06A6A2F4 V_MUL_F32_e32 v83, v83, v83 ; 10A6A753 V_MOV_B32_e32 v84, 0x80000000 ; 7EA802FF 80000000 V_XOR_B32_e32 v82, v82, v84 ; 3AA4A952 V_SUB_F32_e32 v80, v82, v80 ; 08A0A152 V_ADD_F32_e32 v82, 2.000000e+00, v80 ; 06A4A0F4 V_MUL_F32_e32 v82, v82, v82 ; 10A4A552 V_MUL_F32_e32 v82, v82, v82 ; 10A4A552 V_MAD_F32 v84, v83, v83, v82, 0, 0 ; D2820054 054AA753 V_SUB_F32_e32 v81, 2.000000e+00, v81 ; 08A2A2F4 V_MUL_F32_e32 v81, v81, v81 ; 10A2A351 V_MAD_F32 v84, v81, v81, v84, 0, 0 ; D2820054 0552A351 V_SUB_F32_e32 v80, 2.000000e+00, v80 ; 08A0A0F4 V_MUL_F32_e32 v80, v80, v80 ; 10A0A150 V_MAD_F32 v84, v80, v80, v84, 0, 0 ; D2820054 0552A150 V_RCP_F32_e32 v84, v84 ; 7EA85554 V_MUL_F32_e32 v83, v83, v83 ; 10A6A753 V_MUL_F32_e32 v83, v84, v83 ; 10A6A754 V_RCP_F32_e32 v85, v88 ; 7EAA5558 V_MUL_F32_e32 v83, v85, v83 ; 10A6A755 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x0 ; C0C40700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[86:89], 15, 0, 0, 0, 0, 0, 0, 0, v[72:73], s[8:15], s[0:3] ; F0800F00 00025648 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v90, v87, v87 ; 06B4AF57 V_ADD_F32_e32 v90, -1.000000e+00, v90 ; 06B4B4F3 V_MUL_F32_e32 v91, v83, v90 ; 10B6B553 V_MAD_F32 v92, v83, v90, v91, 0, 0 ; D282005C 056EB553 V_MUL_F32_e32 v82, v84, v82 ; 10A4A554 V_MUL_F32_e32 v82, v85, v82 ; 10A4A555 V_MUL_F32_e32 v93, v82, v82 ; 10BAA552 IMAGE_SAMPLE v[94:97], 15, 0, 0, 0, 0, 0, 0, 0, v[63:64], s[8:15], s[0:3] ; F0800F00 00025E3F S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v98, v93, v97 ; 10C4C35D V_MUL_F32_e32 v99, v83, v83 ; 10C6A753 V_MAD_F32 v98, v89, v99, v98, 0, 0 ; D2820062 058AC759 V_ADD_F32_e32 v100, v95, v95 ; 06C8BF5F V_ADD_F32_e32 v100, -1.000000e+00, v100 ; 06C8C8F3 V_MUL_F32_e32 v101, v82, v100 ; 10CAC952 V_MAD_F32 v98, v101, v92, v98, 0, 0 ; D2820062 058AB965 V_MUL_F32_e32 v81, v81, v81 ; 10A2A351 V_MUL_F32_e32 v81, v84, v81 ; 10A2A354 V_MUL_F32_e32 v81, v85, v81 ; 10A2A355 V_MUL_F32_e32 v102, v81, v81 ; 10CCA351 IMAGE_SAMPLE v[103:106], 15, 0, 0, 0, 0, 0, 0, 0, v[60:61], s[8:15], s[0:3] ; F0800F00 0002673C S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v98, v106, v102, v98, 0, 0 ; D2820062 058ACD6A V_MAD_F32 v107, v104, -2.000000e+00, 1.000000e+00, 0, 0 ; D282006B 03C9EB68 V_MUL_F32_e32 v108, v81, v107 ; 10D8D751 V_MAD_F32 v98, v92, v108, v98, 0, 0 ; D2820062 058AD95C V_MAD_F32 v100, v82, v100, v101, 0, 0 ; D2820064 0596C952 V_MAD_F32 v98, v108, v100, v98, 0, 0 ; D2820062 058AC96C V_MUL_F32_e32 v80, v80, v80 ; 10A0A150 V_MUL_F32_e32 v80, v84, v80 ; 10A0A154 V_MUL_F32_e32 v80, v85, v80 ; 10A0A155 V_MUL_F32_e32 v84, v80, v80 ; 10A8A150 IMAGE_SAMPLE v[109:112], 15, 0, 0, 0, 0, 0, 0, 0, v[58:59], s[8:15], s[0:3] ; F0800F00 00026D3A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v85, v112, v84, v98, 0, 0 ; D2820055 058AA970 V_ADD_F32_e32 v98, v110, v110 ; 06C4DD6E V_SUB_F32_e32 v98, 1.000000e+00, v98 ; 08C4C4F2 V_MUL_F32_e32 v113, v80, v98 ; 10E2C550 V_MAD_F32 v85, v92, v113, v85, 0, 0 ; D2820055 0556E35C V_MAD_F32 v85, v100, v113, v85, 0, 0 ; D2820055 0556E364 V_MUL_F32_e32 v92, v108, v113 ; 10B8E36C V_MAD_F32 v92, v108, v113, v92, 0, 0 ; D282005C 0572E36C V_ADD_F32_e32 v85, v85, v92 ; 06AAB955 V_MAD_F32 v90, v83, v90, v101, 0, 0 ; D282005A 0596B553 V_MAD_F32 v90, v81, v107, v90, 0, 0 ; D282005A 056AD751 V_MAD_F32 v90, v80, v98, v90, 0, 0 ; D282005A 056AC550 V_MUL_F32_e32 v92, v90, v90 ; 10B8B55A V_SUB_F32_e32 v85, v85, v92 ; 08AAB955 V_RCP_F32_e32 v79, v79 ; 7E9E554F V_ADD_F32_e32 v92, v85, v79 ; 06B89F55 V_ADD_F32_e32 v98, v86, v86 ; 06C4AD56 V_ADD_F32_e32 v98, -1.000000e+00, v98 ; 06C4C4F3 V_MUL_F32_e32 v100, v83, v98 ; 10C8C553 V_MAD_F32 v107, v83, v98, v100, 0, 0 ; D282006B 0592C553 V_MUL_F32_e32 v114, v93, v96 ; 10E4C15D V_MAD_F32 v86, v88, v99, v114, 0, 0 ; D2820056 05CAC758 V_ADD_F32_e32 v87, v94, v94 ; 06AEBD5E V_SUB_F32_e32 v87, 1.000000e+00, v87 ; 08AEAEF2 V_MUL_F32_e32 v88, v82, v87 ; 10B0AF52 V_MAD_F32 v86, v88, v107, v86, 0, 0 ; D2820056 055AD758 V_MAD_F32 v86, v105, v102, v86, 0, 0 ; D2820056 055ACD69 V_MAD_F32 v89, v103, -2.000000e+00, 1.000000e+00, 0, 0 ; D2820059 03C9EB67 V_MUL_F32_e32 v94, v81, v89 ; 10BCB351 V_MAD_F32 v86, v107, v94, v86, 0, 0 ; D2820056 055ABD6B V_MAD_F32 v82, v82, v87, v88, 0, 0 ; D2820052 0562AF52 V_MAD_F32 v86, v94, v82, v86, 0, 0 ; D2820056 055AA55E V_MAD_F32 v86, v111, v84, v86, 0, 0 ; D2820056 055AA96F V_ADD_F32_e32 v87, v109, v109 ; 06AEDB6D V_ADD_F32_e32 v87, -1.000000e+00, v87 ; 06AEAEF3 V_MUL_F32_e32 v95, v80, v87 ; 10BEAF50 V_MAD_F32 v86, v107, v95, v86, 0, 0 ; D2820056 055ABF6B V_MAD_F32 v82, v82, v95, v86, 0, 0 ; D2820052 055ABF52 V_MUL_F32_e32 v86, v94, v95 ; 10ACBF5E V_MAD_F32 v86, v94, v95, v86, 0, 0 ; D2820056 055ABF5E V_ADD_F32_e32 v82, v82, v86 ; 06A4AD52 V_MAD_F32 v83, v83, v98, v88, 0, 0 ; D2820053 0562C553 V_MAD_F32 v81, v81, v89, v83, 0, 0 ; D2820051 054EB351 V_MAD_F32 v80, v80, v87, v81, 0, 0 ; D2820050 0546AF50 V_MUL_F32_e32 v81, v80, v80 ; 10A2A150 V_SUB_F32_e32 v81, v82, v81 ; 08A2A352 V_ADD_F32_e32 v82, v81, v79 ; 06A49F51 V_MUL_F32_e32 v83, v82, v92 ; 10A6B952 V_MUL_F32_e32 v86, v80, v90 ; 10ACB550 V_MUL_F32_e32 v87, v91, v88 ; 10AEB15B V_MAD_F32 v87, v100, v101, v87, 0, 0 ; D2820057 055ECB64 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x8 ; C0C40708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v63, 1, 0, 0, 0, 0, 0, 0, 0, v[63:64], s[8:15], s[0:3] ; F0800100 00023F3F S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v63, v63, -2.000000e+00, 1.000000e+00, 0, 0 ; D282003F 03C9EB3F V_MUL_F32_e32 v63, v93, v63 ; 107E7F5D IMAGE_SAMPLE v64, 1, 0, 0, 0, 0, 0, 0, 0, v[72:73], s[8:15], s[0:3] ; F0800100 00024048 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v64, v64, v64 ; 06808140 V_ADD_F32_e32 v64, -1.000000e+00, v64 ; 068080F3 V_MAD_F32 v63, v64, v99, v63, 0, 0 ; D282003F 04FEC740 V_ADD_F32_e32 v63, v63, v87 ; 067EAF3F IMAGE_SAMPLE v60, 1, 0, 0, 0, 0, 0, 0, 0, v[60:61], s[8:15], s[0:3] ; F0800100 00023C3C S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v60, v60, v60 ; 0678793C V_ADD_F32_e32 v60, -1.000000e+00, v60 ; 067878F3 V_MAD_F32 v60, v60, v102, v63, 0, 0 ; D282003C 04FECD3C V_MUL_F32_e32 v61, v91, v94 ; 107ABD5B V_MAD_F32 v61, v100, v108, v61, 0, 0 ; D282003D 04F6D964 V_ADD_F32_e32 v60, v60, v61 ; 06787B3C V_MUL_F32_e32 v61, v101, v94 ; 107ABD65 V_MAD_F32 v61, v88, v108, v61, 0, 0 ; D282003D 04F6D958 V_ADD_F32_e32 v60, v60, v61 ; 06787B3C IMAGE_SAMPLE v58, 1, 0, 0, 0, 0, 0, 0, 0, v[58:59], s[8:15], s[0:3] ; F0800100 00023A3A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v58, v58, -2.000000e+00, 1.000000e+00, 0, 0 ; D282003A 03C9EB3A V_MAD_F32 v58, v58, v84, v60, 0, 0 ; D282003A 04F2A93A V_MUL_F32_e32 v59, v91, v95 ; 1076BF5B V_MAD_F32 v59, v100, v113, v59, 0, 0 ; D282003B 04EEE364 V_ADD_F32_e32 v58, v59, v58 ; 0674753B V_MUL_F32_e32 v59, v101, v95 ; 1076BF65 V_MAD_F32 v59, v88, v113, v59, 0, 0 ; D282003B 04EEE358 V_ADD_F32_e32 v58, v58, v59 ; 0674773A V_MUL_F32_e32 v59, v108, v95 ; 1076BF6C V_MAD_F32 v59, v94, v113, v59, 0, 0 ; D282003B 04EEE35E V_ADD_F32_e32 v58, v58, v59 ; 0674773A V_SUB_F32_e32 v58, v58, v86 ; 0874AD3A V_MUL_F32_e32 v59, v58, v58 ; 1076753A V_SUB_F32_e32 v60, v83, v59 ; 08787753 V_MOV_B32_e32 v61, 1.000000e-07 ; 7E7A02FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v60, v61 ; 7C0C7B3C V_CMP_U_F32_e64 s[0:1], v60, v60, 0, 0 ; D0100000 0002793C V_CNDMASK_B32_e64 v63, 0, -1, vcc, 0, 0, 0, 0 ; D200003F 01A98280 V_CNDMASK_B32_e64 v64, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000040 00018280 V_OR_B32_e32 v63, v63, v64 ; 387E813F V_MOV_B32_e32 v64, 0x33d6bf95 ; 7E8002FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v63, 0, 0, 0 ; D10A0000 0001013F V_CNDMASK_B32_e64 v63, v64, v60, s[0:1], 0, 0, 0, 0 ; D200003F 00027940 V_RSQ_CLAMP_F32_e32 v63, v63 ; 7E7E593F V_RCP_F32_e32 v72, v60 ; 7E90553C V_ADD_F32_e32 v58, v58, v58 ; 0674753A V_SUB_F32_e32 v73, v22, v9 ; 08921316 V_SUB_F32_e32 v83, v21, v10 ; 08A61515 V_MUL_F32_e32 v84, v83, v83 ; 10A8A753 V_MAD_F32 v84, v73, v73, v84, 0, 0 ; D2820054 05529349 V_SUB_F32_e32 v86, v18, v8 ; 08AC1112 V_MAD_F32 v84, v86, v86, v84, 0, 0 ; D2820054 0552AD56 V_CMP_GE_F32_e32 vcc, v84, v61 ; 7C0C7B54 V_CMP_U_F32_e64 s[0:1], v84, v84, 0, 0 ; D0100000 0002A954 V_CNDMASK_B32_e64 v87, 0, -1, vcc, 0, 0, 0, 0 ; D2000057 01A98280 V_CNDMASK_B32_e64 v88, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000058 00018280 V_OR_B32_e32 v87, v87, v88 ; 38AEB157 V_CMP_NE_I32_e64 s[0:1], v87, 0, 0, 0 ; D10A0000 00010157 V_CNDMASK_B32_e64 v84, v64, v84, s[0:1], 0, 0, 0, 0 ; D2000054 0002A940 V_RSQ_CLAMP_F32_e32 v84, v84 ; 7EA85954 V_MUL_F32_e32 v73, v84, v73 ; 10929354 V_SUB_F32_e32 v87, v73, v78 ; 08AE9D49 V_MAD_F32 v78, v75, v87, v78, 0, 0 ; D282004E 053AAF4B V_MUL_F32_e32 v83, v84, v83 ; 10A6A754 V_SUB_F32_e32 v87, v83, v77 ; 08AE9B53 V_MAD_F32 v77, v75, v87, v77, 0, 0 ; D282004D 0536AF4B V_MUL_F32_e32 v87, v77, v77 ; 10AE9B4D V_MAD_F32 v87, v78, v78, v87, 0, 0 ; D2820057 055E9D4E V_MUL_F32_e32 v88, v84, v86 ; 10B0AD54 V_SUB_F32_e32 v89, v88, v76 ; 08B29958 V_MAD_F32 v75, v75, v89, v76, 0, 0 ; D282004B 0532B34B V_MAD_F32 v76, v75, v75, v87, 0, 0 ; D282004C 055E974B V_CMP_GE_F32_e32 vcc, v76, v61 ; 7C0C7B4C V_CMP_U_F32_e64 s[0:1], v76, v76, 0, 0 ; D0100000 0002994C V_CNDMASK_B32_e64 v87, 0, -1, vcc, 0, 0, 0, 0 ; D2000057 01A98280 V_CNDMASK_B32_e64 v89, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000059 00018280 V_OR_B32_e32 v87, v87, v89 ; 38AEB357 V_CMP_NE_I32_e64 s[0:1], v87, 0, 0, 0 ; D10A0000 00010157 V_CNDMASK_B32_e64 v76, v64, v76, s[0:1], 0, 0, 0, 0 ; D200004C 00029940 V_RSQ_CLAMP_F32_e32 v76, v76 ; 7E98594C V_MAD_F32 v77, v77, v76, v56, 0, 0 ; D282004D 04E2994D V_MAD_F32 v75, v75, v76, v54, 0, 0 ; D282004B 04DA994B V_RCP_F32_e32 v75, v75 ; 7E96554B V_MUL_F32_e32 v77, v77, v75 ; 109A974D V_SUB_F32_e32 v77, v77, v80 ; 089AA14D V_MUL_F32_e32 v87, v77, v58 ; 10AE754D V_MAD_F32 v76, v78, v76, v55, 0, 0 ; D282004C 04DE994E V_MUL_F32_e32 v75, v76, v75 ; 1096974C V_SUB_F32_e32 v75, v75, v90 ; 0896B54B V_MUL_F32_e32 v76, v82, v75 ; 10989752 V_SUB_F32_e32 v76, v76, v87 ; 0898AF4C V_MUL_F32_e32 v76, v75, v76 ; 1098994B V_MUL_F32_e32 v77, v77, v77 ; 109A9B4D V_MAD_F32 v76, v77, v92, v76, 0, 0 ; D282004C 0532B94D V_MUL_F32_e32 v76, 5.000000e-01, v76 ; 109898F0 V_MUL_F32_e32 v78, v72, v76 ; 109C9948 V_MUL_F32_e32 v78, -1.442700e+00, v78 ; 109C9CFF BFB8AA65 V_EXP_F32_e32 v78, v78 ; 7E9C4B4E V_MUL_F32_e32 v63, v78, v63 ; 107E7F4E V_MUL_F32_e32 v78, v63, v57 ; 109C733F V_MOV_B32_e32 v82, -1.600000e+01 ; 7EA402FF C1800000 V_MAD_F32 v72, v72, v76, v82, 0, 0 ; D2820048 054A9948 V_CMP_GE_F32_e64 s[0:1], v72, 0.000000e+00, 0, 0 ; D00C0000 00010148 V_CNDMASK_B32_e64 v72, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000048 0001E480 V_MOV_B32_e32 v76, 0x80000000 ; 7E9802FF 80000000 V_XOR_B32_e32 v60, v60, v76 ; 3A78993C V_CMP_GE_F32_e64 s[0:1], v60, 0.000000e+00, 0, 0 ; D00C0000 0001013C V_CNDMASK_B32_e64 v60, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D200003C 0001E480 V_ADD_F32_e32 v60, v72, v60 ; 06787948 V_XOR_B32_e32 v60, v60, v76 ; 3A78993C V_CMP_GE_F32_e64 s[2:3], v60, 0.000000e+00, 0, 0 ; D00C0002 0001013C V_CNDMASK_B32_e64 v60, 0, v78, s[2:3], 0, 0, 0, 0 ; D200003C 000A9C80 V_RCP_F32_e32 v72, v74 ; 7E90554A V_ADD_F32_e32 v74, v72, v85 ; 0694AB48 V_ADD_F32_e32 v72, v72, v81 ; 0690A348 V_MUL_F32_e32 v76, v72, v74 ; 10989548 V_SUB_F32_e32 v76, v76, v59 ; 0898774C V_CMP_GE_F32_e32 vcc, v76, v61 ; 7C0C7B4C V_CMP_U_F32_e64 s[0:1], v76, v76, 0, 0 ; D0100000 0002994C V_CNDMASK_B32_e64 v78, 0, -1, vcc, 0, 0, 0, 0 ; D200004E 01A98280 V_CNDMASK_B32_e64 v89, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000059 00018280 V_OR_B32_e32 v78, v78, v89 ; 389CB34E V_CMP_NE_I32_e64 s[0:1], v78, 0, 0, 0 ; D10A0000 0001014E V_CNDMASK_B32_e64 v78, v64, v76, s[0:1], 0, 0, 0, 0 ; D200004E 00029940 V_RSQ_CLAMP_F32_e32 v78, v78 ; 7E9C594E V_RCP_F32_e32 v89, v76 ; 7EB2554C V_SUB_F32_e32 v91, v73, v71 ; 08B68F49 V_MAD_F32 v71, v65, v91, v71, 0, 0 ; D2820047 051EB741 V_SUB_F32_e32 v91, v83, v69 ; 08B68B53 V_MAD_F32 v69, v65, v91, v69, 0, 0 ; D2820045 0516B741 V_MUL_F32_e32 v91, v69, v69 ; 10B68B45 V_MAD_F32 v91, v71, v71, v91, 0, 0 ; D282005B 056E8F47 V_SUB_F32_e32 v88, v88, v67 ; 08B08758 V_MAD_F32 v65, v65, v88, v67, 0, 0 ; D2820041 050EB141 V_MAD_F32 v67, v65, v65, v91, 0, 0 ; D2820043 056E8341 V_CMP_GE_F32_e32 vcc, v67, v61 ; 7C0C7B43 V_CMP_U_F32_e64 s[0:1], v67, v67, 0, 0 ; D0100000 00028743 V_CNDMASK_B32_e64 v88, 0, -1, vcc, 0, 0, 0, 0 ; D2000058 01A98280 V_CNDMASK_B32_e64 v91, 0, -1, s[0:1], 0, 0, 0, 0 ; D200005B 00018280 V_OR_B32_e32 v88, v88, v91 ; 38B0B758 V_CMP_NE_I32_e64 s[0:1], v88, 0, 0, 0 ; D10A0000 00010158 V_CNDMASK_B32_e64 v67, v64, v67, s[0:1], 0, 0, 0, 0 ; D2000043 00028740 V_RSQ_CLAMP_F32_e32 v67, v67 ; 7E865943 V_MAD_F32 v69, v69, v67, v70, 0, 0 ; D2820045 051A8745 V_MAD_F32 v65, v65, v67, v68, 0, 0 ; D2820041 05128741 V_RCP_F32_e32 v65, v65 ; 7E825541 V_MUL_F32_e32 v68, v69, v65 ; 10888345 V_SUB_F32_e32 v68, v68, v80 ; 0888A144 V_MUL_F32_e32 v58, v58, v68 ; 1074893A V_MAD_F32 v66, v71, v67, v66, 0, 0 ; D2820042 050A8747 V_MUL_F32_e32 v65, v66, v65 ; 10828342 V_SUB_F32_e32 v65, v65, v90 ; 0882B541 V_MUL_F32_e32 v66, v72, v65 ; 10848348 V_SUB_F32_e32 v58, v66, v58 ; 08747542 V_MUL_F32_e32 v58, v65, v58 ; 10747541 V_MUL_F32_e32 v65, v68, v68 ; 10828944 V_MAD_F32 v58, v65, v74, v58, 0, 0 ; D282003A 04EA9541 V_MUL_F32_e32 v58, 5.000000e-01, v58 ; 107474F0 V_MUL_F32_e32 v65, v58, v89 ; 1082B33A V_MUL_F32_e32 v66, -1.442700e+00, v65 ; 108482FF BFB8AA65 V_EXP_F32_e32 v66, v66 ; 7E844B42 V_MUL_F32_e32 v66, v78, v66 ; 1084854E V_MUL_F32_e32 v62, v66, v62 ; 107C7D42 V_MAD_F32 v58, v58, v89, v82, 0, 0 ; D282003A 054AB33A V_CMP_GE_F32_e64 s[0:1], v58, 0.000000e+00, 0, 0 ; D00C0000 0001013A V_CNDMASK_B32_e64 v58, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D200003A 0001E480 V_MOV_B32_e32 v67, 0x80000000 ; 7E8602FF 80000000 V_XOR_B32_e32 v68, v76, v67 ; 3A88874C V_CMP_GE_F32_e64 s[0:1], v68, 0.000000e+00, 0, 0 ; D00C0000 00010144 V_CNDMASK_B32_e64 v68, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000044 0001E480 V_ADD_F32_e32 v58, v58, v68 ; 0674893A V_XOR_B32_e32 v58, v58, v67 ; 3A74873A V_CMP_GE_F32_e64 s[8:9], v58, 0.000000e+00, 0, 0 ; D00C0008 0001013A V_CNDMASK_B32_e64 v58, 0, v62, s[8:9], 0, 0, 0, 0 ; D200003A 00227C80 V_ADD_F32_e32 v58, v58, v60 ; 0674793A V_MUL_F32_e32 v60, v80, v83 ; 1078A750 V_MAD_F32 v60, v90, v73, v60, 0, 0 ; D282003C 04F2935A V_MAD_F32 v60, v84, v86, v60, 0, 0 ; D282003C 04F2AD54 V_SUB_F32_e32 v60, 1.000000e+00, v60 ; 087878F2 V_MUL_F32_e32 v62, v60, v60 ; 107C793C V_MUL_F32_e32 v62, v62, v62 ; 107C7D3E V_MUL_F32_e32 v60, v60, v62 ; 10787D3C S_LOAD_DWORDX4 s[12:15], s[4:5], 0x14 ; C0860514 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x28 ; C0C80728 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v23, 8, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800800 00641717 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v23, 4.000000e+00, v23 ; 102E2EF6 V_ADD_F32_e64 v23, v23, 0, 1, 0 ; D2060817 00010117 V_SUB_F32_e32 v67, 1.000000e+00, v23 ; 08862EF2 V_MOV_B32_e32 v68, 0 ; 7E880280 V_MOV_B32_e32 v69, v68 ; 7E8A0344 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x10 ; C0860510 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x20 ; C0C80720 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE_L v[67:70], 15, 0, 0, 0, 0, 0, 0, 0, v[67:70], s[16:23], s[12:15] ; F0900F00 00644343 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v24, v69 ; 7E304F45 V_MUL_LEGACY_F32_e32 v24, 2.200000e+00, v24 ; 0E3030FF 400CCCCD V_EXP_F32_e32 v24, v24 ; 7E304B18 V_MAD_F32 v58, v58, v60, v24, 0, 0 ; D282003A 0462793A V_MOV_B32_e32 v62, 1.600000e+01 ; 7E7C02FF 41800000 V_MAD_F32 v71, v79, v62, v85, 0, 0 ; D2820047 05567D4F V_MAD_F32 v62, v79, v62, v81, 0, 0 ; D282003E 05467D4F V_MUL_F32_e32 v72, v62, v71 ; 10908F3E V_SUB_F32_e32 v59, v72, v59 ; 08767748 V_CMP_GE_F32_e32 vcc, v59, v61 ; 7C0C7B3B V_CMP_U_F32_e64 s[0:1], v59, v59, 0, 0 ; D0100000 0002773B V_CNDMASK_B32_e64 v61, 0, -1, vcc, 0, 0, 0, 0 ; D200003D 01A98280 V_CNDMASK_B32_e64 v72, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000048 00018280 V_OR_B32_e32 v61, v61, v72 ; 387A913D V_CMP_NE_I32_e64 s[0:1], v61, 0, 0, 0 ; D10A0000 0001013D V_CNDMASK_B32_e64 v61, v64, v59, s[0:1], 0, 0, 0, 0 ; D200003D 00027740 V_RSQ_CLAMP_F32_e32 v61, v61 ; 7E7A593D V_RCP_F32_e32 v64, v59 ; 7E80553B V_MUL_F32_e32 v62, v62, v75 ; 107C973E V_SUB_F32_e32 v62, v62, v87 ; 087CAF3E V_MUL_F32_e32 v62, v75, v62 ; 107C7D4B V_MAD_F32 v62, v77, v71, v62, 0, 0 ; D282003E 04FA8F4D V_MUL_F32_e32 v62, 5.000000e-01, v62 ; 107C7CF0 V_MUL_F32_e32 v71, v64, v62 ; 108E7D40 V_MUL_F32_e32 v71, -1.442700e+00, v71 ; 108E8EFF BFB8AA65 V_EXP_F32_e32 v71, v71 ; 7E8E4B47 V_MUL_F32_e32 v61, v61, v71 ; 107A8F3D V_MUL_F32_e32 v61, 5.000000e-03, v61 ; 107A7AFF 3BA3D70A V_MUL_F32_e32 v57, v24, v57 ; 10727318 V_MUL_F32_e32 v57, v61, v57 ; 1072733D V_MAD_F32 v62, v64, v62, v82, 0, 0 ; D282003E 054A7D40 V_CMP_GE_F32_e64 s[0:1], v62, 0.000000e+00, 0, 0 ; D00C0000 0001013E V_CNDMASK_B32_e64 v62, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D200003E 0001E480 V_MOV_B32_e32 v64, 0x80000000 ; 7E8002FF 80000000 V_XOR_B32_e32 v59, v59, v64 ; 3A76813B V_CMP_GE_F32_e64 s[0:1], v59, 0.000000e+00, 0, 0 ; D00C0000 0001013B V_CNDMASK_B32_e64 v59, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D200003B 0001E480 V_ADD_F32_e32 v59, v62, v59 ; 0676773E V_XOR_B32_e32 v59, v59, v64 ; 3A76813B V_CMP_GE_F32_e64 s[0:1], v59, 0.000000e+00, 0, 0 ; D00C0000 0001013B V_CNDMASK_B32_e64 v57, 0, v57, s[0:1], 0, 0, 0, 0 ; D2000039 00027280 V_MUL_F32_e32 v56, v56, v80 ; 1070A138 V_MAD_F32 v55, v55, v90, v56, 0, 0 ; D2820037 04E2B537 V_ADD_F32_e32 v54, v55, v54 ; 066C6D37 V_ADD_F32_e64 v54, v54, 0, 1, 0 ; D2060836 00010136 V_MAD_F32 v55, v58, v54, v57, 0, 0 ; D2820037 04E66D3A V_MUL_F32_e32 v52, v52, v9 ; 10681334 V_MAD_F32 v52, v10, v53, v52, 0, 0 ; D2820034 04D26B0A V_MAD_F32 v50, v8, v50, v52, 0, 0 ; D2820032 04D26508 V_ADD_F32_e32 v48, v50, v48 ; 06606132 V_MUL_F32_e32 v49, v49, v9 ; 10621331 V_MAD_F32 v49, v10, v51, v49, 0, 0 ; D2820031 04C6670A V_MAD_F32 v47, v8, v47, v49, 0, 0 ; D282002F 04C65F08 V_ADD_F32_e32 v45, v47, v45 ; 065A5B2F V_RCP_F32_e32 v45, v45 ; 7E5A552D V_MUL_F32_e32 v47, v45, v48 ; 105E612D V_MAD_F32 v47, v47, -5.000000e-01, -5.000000e-01, 0, 0 ; D282002F 03C5E32F V_MOV_B32_e32 v48, 6.000000e+00 ; 7E6002FF 40C00000 V_MAD_F32 v47, v85, v48, v47, 0, 0 ; D282002F 04BE6155 V_MAD_F32 v50, v76, -2.000000e+00, v47, 0, 0 ; D2820032 04BDEB4C V_ADD_F32_e32 v51, v76, v76 ; 0666994C V_MUL_F32_e32 v44, v44, v9 ; 1058132C V_MAD_F32 v44, v10, v46, v44, 0, 0 ; D282002C 04B25D0A V_MAD_F32 v43, v8, v43, v44, 0, 0 ; D282002B 04B25708 V_ADD_F32_e32 v42, v43, v42 ; 0654552B V_MUL_F32_e32 v42, v45, v42 ; 1054552D V_MAD_F32 v42, v42, 5.000000e-01, -5.000000e-01, 0, 0 ; D282002A 03C5E12A V_MAD_F32 v42, v81, v48, v42, 0, 0 ; D282002A 04AA6151 V_ADD_F32_e32 v49, v51, v42 ; 06625533 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x8 ; C0860508 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x10 ; C0C80710 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[43:46], 15, 0, 0, 0, 0, 0, 0, 0, v[49:50], s[16:23], s[12:15] ; F0800F00 00642B31 V_MOV_B32_e32 v52, 2.500000e-01 ; 7E6802FF 3E800000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v53, v45, v52 ; 106A692D V_ADD_F32_e32 v51, v51, v47 ; 06665F33 V_MOV_B32_e32 v56, v49 ; 7E700331 V_MOV_B32_e32 v57, v50 ; 7E720332 V_MOV_B32_e32 v57, v51 ; 7E720333 IMAGE_SAMPLE v[56:59], 15, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[16:23], s[12:15] ; F0800F00 00643838 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v51, v58, v52, v53, 0, 0 ; D2820033 04D6693A V_MAD_F32 v48, v76, v48, v47, 0, 0 ; D2820030 04BE614C V_MAD_F32 v47, v76, -2.000000e+00, v42, 0, 0 ; D282002F 04A9EB4C IMAGE_SAMPLE v[71:74], 15, 0, 0, 0, 0, 0, 0, 0, v[47:48], s[16:23], s[12:15] ; F0800F00 0064472F S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v42, v73, v52, v51, 0, 0 ; D282002A 04CE6949 V_MOV_B32_e32 v48, v50 ; 7E600332 IMAGE_SAMPLE v[47:50], 15, 0, 0, 0, 0, 0, 0, 0, v[47:48], s[16:23], s[12:15] ; F0800F00 00642F2F S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v42, v49, v52, v42, 0, 0 ; D282002A 04AA6931 V_MOV_B32_e32 v51, 0x7fffffff ; 7E6602FF 7FFFFFFF V_AND_B32_e32 v42, v42, v51 ; 3654672A V_LOG_F32_e32 v42, v42 ; 7E544F2A V_MUL_F32_e32 v42, 2.200000e+00, v42 ; 105454FF 400CCCCD V_EXP_F32_e32 v42, v42 ; 7E544B2A V_MAD_F32 v42, v42, v17, v55, 0, 0 ; D282002A 04DE232A V_MUL_F32_e32 v51, v65, v65 ; 10668341 V_MUL_F32_e32 v51, v65, v51 ; 10666741 V_MUL_F32_e32 v23, v23, v51 ; 102E6717 V_MAD_F32 v24, v23, v24, v42, 0, 0 ; D2820018 04AA3117 V_MUL_F32_e32 v40, v40, v9 ; 10501328 V_MAD_F32 v40, v10, v41, v40, 0, 0 ; D2820028 04A2530A V_MAD_F32 v38, v8, v38, v40, 0, 0 ; D2820026 04A24D08 V_ADD_F32_e32 v36, v38, v36 ; 06484926 V_MUL_F32_e32 v37, v37, v9 ; 104A1325 V_MAD_F32 v37, v10, v39, v37, 0, 0 ; D2820025 04964F0A V_MAD_F32 v34, v8, v34, v37, 0, 0 ; D2820022 04964508 V_ADD_F32_e32 v32, v34, v32 ; 06404122 V_RCP_F32_e32 v32, v32 ; 7E405520 V_MUL_F32_e32 v34, v32, v36 ; 10444920 V_MAD_F32 v36, v34, 5.000000e-01, -5.000000e-01, 0, 0 ; D2820024 03C5E122 V_MUL_F32_e32 v33, v33, v9 ; 10421321 V_MAD_F32 v33, v10, v35, v33, 0, 0 ; D2820021 0486470A V_MAD_F32 v30, v8, v30, v33, 0, 0 ; D282001E 04863D08 V_ADD_F32_e32 v28, v30, v28 ; 0638391E V_MUL_F32_e32 v28, v32, v28 ; 10383920 V_MAD_F32 v28, v28, -5.000000e-01, -5.000000e-01, 0, 0 ; D282001C 03C5E31C V_SUB_F32_e32 v37, 1.000000e+00, v28 ; 084A38F2 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x28 ; C0860528 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x50 ; C0C80750 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v28, 1, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[16:23], s[12:15] ; F0800100 00641C24 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e64 v28, v28, 0, 1, 0 ; D206081C 0001011C V_MUL_F32_e32 v27, v28, v27 ; 1036371C V_MUL_F32_e32 v28, v27, v24 ; 1038311B V_MOV_B32_e32 v30, -6.700000e-01 ; 7E3C02FF BF2B851F V_MAD_F32 v24, v28, v30, v24, 0, 0 ; D2820018 04623D1C V_MOV_B32_e32 v28, 0x7fffffff ; 7E3802FF 7FFFFFFF V_AND_B32_e32 v24, v24, v28 ; 36303918 V_LOG_F32_e32 v24, v24 ; 7E304F18 V_MUL_F32_e32 v24, 4.545450e-01, v24 ; 103030FF 3EE8BA1F V_EXP_F32_e32 v24, v24 ; 7E304B18 V_SUB_F32_e32 v12, 1.000000e+00, v13 ; 08181AF2 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x1c ; C086051C S_LOAD_DWORDX8 s[16:23], s[6:7], 0x38 ; C0C80738 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[32:35], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800F00 0064200B S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v11, 1.000000e+00, v35 ; 081646F2 V_MAD_F32 v12, v24, v11, v34, 0, 0 ; D282000C 048A1718 V_ADD_F32_e32 v12, -4.000000e-01, v12 ; 061818FF BECCCCCD V_MOV_B32_e32 v13, 4.000000e-01 ; 7E1A02FF 3ECCCCCD V_MAD_F32 v38, v2, v12, v13, 0, 0 ; D2820026 04361902 V_MUL_F32_e32 v12, v63, v29 ; 10183B3F V_CNDMASK_B32_e64 v12, 0, v12, s[2:3], 0, 0, 0, 0 ; D200000C 000A1880 V_MUL_F32_e32 v24, v66, v31 ; 10303F42 V_CNDMASK_B32_e64 v24, 0, v24, s[8:9], 0, 0, 0, 0 ; D2000018 00223080 V_ADD_F32_e32 v12, v24, v12 ; 06181918 V_LOG_F32_e32 v24, v68 ; 7E304F44 V_MUL_LEGACY_F32_e32 v24, 2.200000e+00, v24 ; 0E3030FF 400CCCCD V_EXP_F32_e32 v24, v24 ; 7E304B18 V_MAD_F32 v12, v12, v60, v24, 0, 0 ; D282000C 0462790C V_MUL_F32_e32 v28, v24, v29 ; 10383B18 V_MUL_F32_e32 v28, v61, v28 ; 1038393D V_CNDMASK_B32_e64 v28, 0, v28, s[0:1], 0, 0, 0, 0 ; D200001C 00023880 V_MAD_F32 v12, v12, v54, v28, 0, 0 ; D282000C 04726D0C V_MUL_F32_e32 v28, v44, v52 ; 1038692C V_MAD_F32 v28, v57, v52, v28, 0, 0 ; D282001C 04726939 V_MAD_F32 v28, v72, v52, v28, 0, 0 ; D282001C 04726948 V_MAD_F32 v28, v48, v52, v28, 0, 0 ; D282001C 04726930 V_MOV_B32_e32 v29, 0x7fffffff ; 7E3A02FF 7FFFFFFF V_AND_B32_e32 v28, v28, v29 ; 36383B1C V_LOG_F32_e32 v28, v28 ; 7E384F1C V_MUL_F32_e32 v28, 2.200000e+00, v28 ; 103838FF 400CCCCD V_EXP_F32_e32 v28, v28 ; 7E384B1C V_MAD_F32 v12, v28, v17, v12, 0, 0 ; D282000C 0432231C V_MAD_F32 v12, v23, v24, v12, 0, 0 ; D282000C 04323117 V_MUL_F32_e32 v24, v27, v12 ; 1030191B V_MAD_F32 v12, v24, v30, v12, 0, 0 ; D282000C 04323D18 V_MOV_B32_e32 v24, 0x7fffffff ; 7E3002FF 7FFFFFFF V_AND_B32_e32 v12, v12, v24 ; 3618310C V_LOG_F32_e32 v12, v12 ; 7E184F0C V_MUL_F32_e32 v12, 4.545450e-01, v12 ; 101818FF 3EE8BA1F V_EXP_F32_e32 v12, v12 ; 7E184B0C V_MAD_F32 v12, v12, v11, v33, 0, 0 ; D282000C 0486170C V_ADD_F32_e32 v12, -4.000000e-01, v12 ; 061818FF BECCCCCD V_MAD_F32 v37, v2, v12, v13, 0, 0 ; D2820025 04361902 V_MUL_F32_e32 v12, v63, v25 ; 1018333F V_CNDMASK_B32_e64 v12, 0, v12, s[2:3], 0, 0, 0, 0 ; D200000C 000A1880 V_MUL_F32_e32 v24, v66, v26 ; 10303542 V_CNDMASK_B32_e64 v24, 0, v24, s[8:9], 0, 0, 0, 0 ; D2000018 00223080 V_ADD_F32_e32 v12, v24, v12 ; 06181918 V_LOG_F32_e32 v24, v67 ; 7E304F43 V_MUL_LEGACY_F32_e32 v24, 2.200000e+00, v24 ; 0E3030FF 400CCCCD V_EXP_F32_e32 v24, v24 ; 7E304B18 V_MAD_F32 v12, v12, v60, v24, 0, 0 ; D282000C 0462790C V_MUL_F32_e32 v25, v24, v25 ; 10323318 V_MUL_F32_e32 v25, v61, v25 ; 1032333D V_CNDMASK_B32_e64 v25, 0, v25, s[0:1], 0, 0, 0, 0 ; D2000019 00023280 V_MAD_F32 v12, v12, v54, v25, 0, 0 ; D282000C 04666D0C V_MUL_F32_e32 v25, v43, v52 ; 1032692B V_MAD_F32 v25, v56, v52, v25, 0, 0 ; D2820019 04666938 V_MAD_F32 v25, v71, v52, v25, 0, 0 ; D2820019 04666947 V_MAD_F32 v25, v47, v52, v25, 0, 0 ; D2820019 0466692F V_MOV_B32_e32 v26, 0x7fffffff ; 7E3402FF 7FFFFFFF V_AND_B32_e32 v25, v25, v26 ; 36323519 V_LOG_F32_e32 v25, v25 ; 7E324F19 V_MUL_F32_e32 v25, 2.200000e+00, v25 ; 103232FF 400CCCCD V_EXP_F32_e32 v25, v25 ; 7E324B19 V_MAD_F32 v12, v25, v17, v12, 0, 0 ; D282000C 04322319 V_MAD_F32 v12, v23, v24, v12, 0, 0 ; D282000C 04323117 V_MUL_F32_e32 v17, v27, v12 ; 1022191B V_MAD_F32 v12, v17, v30, v12, 0, 0 ; D282000C 04323D11 V_MOV_B32_e32 v17, 0x7fffffff ; 7E2202FF 7FFFFFFF V_AND_B32_e32 v12, v12, v17 ; 3618230C V_LOG_F32_e32 v12, v12 ; 7E184F0C V_MUL_F32_e32 v12, 4.545450e-01, v12 ; 101818FF 3EE8BA1F V_EXP_F32_e32 v12, v12 ; 7E184B0C V_MAD_F32 v11, v12, v11, v32, 0, 0 ; D282000B 0482170C V_ADD_F32_e32 v11, -4.000000e-01, v11 ; 061616FF BECCCCCD V_MAD_F32 v36, v2, v11, v13, 0, 0 ; D2820024 04361702 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x24 ; C0800524 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x48 ; C0C40748 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[36:39], s[8:15], s[0:3] ; F0800700 00020B24 V_MUL_F32_e32 v17, v4, v2 ; 10220504 V_SUB_F32_e32 v2, v2, v17 ; 08042302 V_SUB_F32_e32 v3, 1.000000e+00, v2 ; 080604F2 V_MUL_F32_e32 v4, v3, v37 ; 10084B03 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v2, v12, v4, 0, 0 ; D2820004 04121902 V_SUB_F32_e32 v5, v14, v4 ; 080A090E V_SUB_F32_e32 v9, v9, v22 ; 08122D09 V_SUB_F32_e32 v10, v10, v21 ; 08142B0A V_MUL_F32_e32 v10, v10, v10 ; 1014150A V_MAD_F32 v9, v9, v9, v10, 0, 0 ; D2820009 042A1309 V_SUB_F32_e32 v8, v8, v18 ; 08102508 V_MAD_F32 v9, v8, v8, v9, 0, 0 ; D2820009 04261108 V_MUL_F32_e32 v9, v9, v20 ; 10122909 V_MUL_F32_e32 v10, v8, v19 ; 10142708 V_MUL_F32_e32 v10, 1.442700e+00, v10 ; 101414FF 3FB8AA65 V_EXP_F32_e32 v10, v10 ; 7E144B0A V_SUB_F32_e32 v10, 1.000000e+00, v10 ; 081414F2 V_MUL_F32_e32 v9, v10, v9 ; 1012130A V_RCP_F32_e32 v8, v8 ; 7E105508 V_MUL_F32_e32 v8, v8, v9 ; 10101308 V_MUL_F32_e32 v8, 1.442700e+00, v8 ; 101010FF 3FB8AA65 V_EXP_F32_e32 v8, v8 ; 7E104B08 V_ADD_F32_e64 v8, v8, 0, 1, 0 ; D2060808 00010108 V_SUB_F32_e32 v8, 1.000000e+00, v8 ; 081010F2 V_RCP_F32_e32 v6, v6 ; 7E0C5506 V_MUL_F32_e32 v6, v6, v7 ; 100C0F06 V_MAD_F32 v15, v6, 5.000000e-01, 5.000000e-01, 0, 0 ; D282000F 03C1E106 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x18 ; C0800518 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x30 ; C0C20730 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v6, 1, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[4:11], s[0:3] ; F0800100 0001060F S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v6, v8, v6 ; 100C0D08 V_MAD_F32 v4, v6, v5, v4, 0, 0 ; D2820004 04120B06 V_MUL_F32_e32 v5, v3, v36 ; 100A4903 V_MAD_F32 v5, v2, v11, v5, 0, 0 ; D2820005 04161702 V_SUB_F32_e32 v1, v1, v5 ; 08020B01 V_MAD_F32 v1, v6, v1, v5, 0, 0 ; D2820001 04160306 V_CVT_PKRTZ_F16_F32_e32 v1, v1, v4 ; 5E020901 V_MUL_F32_e32 v3, v3, v38 ; 10064D03 V_MAD_F32 v2, v2, v13, v3, 0, 0 ; D2820002 040E1B02 V_SUB_F32_e32 v0, v0, v2 ; 08000500 V_MAD_F32 v0, v6, v0, v2, 0, 0 ; D2820000 040A0106 V_MUL_F32_e32 v2, v46, v52 ; 1004692E V_MAD_F32 v2, v59, v52, v2, 0, 0 ; D2820002 040A693B V_MAD_F32 v2, v74, v52, v2, 0, 0 ; D2820002 040A694A V_MAD_F32 v2, v50, v52, v2, 0, 0 ; D2820002 040A6932 V_LOG_F32_e32 v3, v70 ; 7E064F46 V_MUL_LEGACY_F32_e32 v3, 1.000000e+00, v3 ; 0E0606F2 V_EXP_F32_e32 v3, v3 ; 7E064B03 V_MAD_F32 v2, v2, v3, v3, 0, 0 ; D2820002 040E0702 V_ADD_F32_e64 v2, v2, 0, 1, 0 ; D2060802 00010102 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v2 ; 5E000500 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL OUT[7], GENERIC[15] DCL CONST[0..13] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, -0.5000, 0.0000, 1.0000} 0: MUL TEMP[0].xy, CONST[13], IN[1] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: ADD TEMP[1], CONST[12], IN[1].xyxy 3: MUL TEMP[1], TEMP[1], CONST[13].xyxy 4: MUL TEMP[2].xy, TEMP[1], CONST[13].zzzz 5: MOV TEMP[2].xy, TEMP[2].xyxx 6: MUL TEMP[3].xy, TEMP[1].zwzw, CONST[13].wwww 7: MOV TEMP[3].xy, TEMP[3].xyxx 8: MUL TEMP[4].xyz, CONST[1], IN[0].yyyy 9: MOV TEMP[1].xyz, TEMP[4].xyzx 10: MAD TEMP[4].xyz, IN[0].xxxx, CONST[0], TEMP[1] 11: MOV TEMP[1].xyz, TEMP[4].xyzx 12: MAD TEMP[4].xyz, IN[0].zzzz, CONST[2], TEMP[1] 13: MOV TEMP[1].xyz, TEMP[4].xyzx 14: ADD TEMP[4].xyz, TEMP[1], CONST[3] 15: MUL TEMP[5].xy, TEMP[4].yyyy, CONST[9] 16: MOV TEMP[5].xy, TEMP[5].xyxx 17: MAD TEMP[6].xy, TEMP[4].xxxx, CONST[8], TEMP[5] 18: MOV TEMP[5].xy, TEMP[6].xyxx 19: MAD TEMP[6].xy, TEMP[4].zzzz, CONST[10], TEMP[5] 20: MOV TEMP[5].xy, TEMP[6].xyxx 21: ADD TEMP[6].xy, TEMP[5], CONST[11] 22: MOV TEMP[5].xy, TEMP[6].xyxx 23: MAD TEMP[6].xy, TEMP[5], IMM[0].xyzz, IMM[0].yyyy 24: MOV TEMP[6].xy, TEMP[6].xyxx 25: MUL TEMP[5], TEMP[4].yyyy, CONST[5] 26: MAD TEMP[5], TEMP[4].xxxx, CONST[4], TEMP[5] 27: MAD TEMP[5], TEMP[4].zzzz, CONST[6], TEMP[5] 28: MOV TEMP[4].xyz, TEMP[4].xyzx 29: ADD TEMP[1], TEMP[5], CONST[7] 30: MOV TEMP[5].xyz, TEMP[1].xywx 31: MOV TEMP[7].xy, IN[2].xyxx 32: MOV TEMP[6].zw, IMM[0].zzzz 33: MOV TEMP[0].zw, IMM[0].wwzw 34: MOV TEMP[2].zw, IMM[0].wwzw 35: MOV TEMP[3].zw, IMM[0].wwzw 36: MOV TEMP[7].zw, IMM[0].wwzw 37: MOV TEMP[4].w, IMM[0].wwww 38: MOV TEMP[5].w, IMM[0].wwww 39: MOV OUT[6], TEMP[5] 40: MOV OUT[7], TEMP[6] 41: MOV OUT[1], TEMP[0] 42: MOV OUT[0], TEMP[1] 43: MOV OUT[2], TEMP[2] 44: MOV OUT[3], TEMP[3] 45: MOV OUT[4], TEMP[7] 46: MOV OUT[5], TEMP[4] 47: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %57 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0 %66 = add i32 %5, %7 %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %66) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0 %72 = add i32 %5, %7 %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %72) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = fmul float %53, %68 %77 = fmul float %54, %69 %78 = fadd float %49, %68 %79 = fadd float %50, %69 %80 = fadd float %51, %68 %81 = fadd float %52, %69 %82 = fmul float %78, %53 %83 = fmul float %79, %54 %84 = fmul float %80, %53 %85 = fmul float %81, %54 %86 = fmul float %82, %55 %87 = fmul float %83, %55 %88 = fmul float %84, %56 %89 = fmul float %85, %56 %90 = fmul float %16, %62 %91 = fmul float %17, %62 %92 = fmul float %18, %62 %93 = fmul float %61, %13 %94 = fadd float %93, %90 %95 = fmul float %61, %14 %96 = fadd float %95, %91 %97 = fmul float %61, %15 %98 = fadd float %97, %92 %99 = fmul float %63, %19 %100 = fadd float %99, %94 %101 = fmul float %63, %20 %102 = fadd float %101, %96 %103 = fmul float %63, %21 %104 = fadd float %103, %98 %105 = fadd float %100, %22 %106 = fadd float %102, %23 %107 = fadd float %104, %24 %108 = fmul float %106, %43 %109 = fmul float %106, %44 %110 = fmul float %105, %41 %111 = fadd float %110, %108 %112 = fmul float %105, %42 %113 = fadd float %112, %109 %114 = fmul float %107, %45 %115 = fadd float %114, %111 %116 = fmul float %107, %46 %117 = fadd float %116, %113 %118 = fadd float %115, %47 %119 = fadd float %117, %48 %120 = fmul float %118, 5.000000e-01 %121 = fadd float %120, -5.000000e-01 %122 = fmul float %119, -5.000000e-01 %123 = fadd float %122, -5.000000e-01 %124 = fmul float %106, %29 %125 = fmul float %106, %30 %126 = fmul float %106, %31 %127 = fmul float %106, %32 %128 = fmul float %105, %25 %129 = fadd float %128, %124 %130 = fmul float %105, %26 %131 = fadd float %130, %125 %132 = fmul float %105, %27 %133 = fadd float %132, %126 %134 = fmul float %105, %28 %135 = fadd float %134, %127 %136 = fmul float %107, %33 %137 = fadd float %136, %129 %138 = fmul float %107, %34 %139 = fadd float %138, %131 %140 = fmul float %107, %35 %141 = fadd float %140, %133 %142 = fmul float %107, %36 %143 = fadd float %142, %135 %144 = fadd float %137, %37 %145 = fadd float %139, %38 %146 = fadd float %141, %39 %147 = fadd float %143, %40 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %76, float %77, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %87, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %88, float %89, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %74, float %75, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %105, float %106, float %107, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %144, float %145, float %147, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %121, float %123, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %144, float %145, float %146, float %147) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x35 ; C2020135 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v2 ; 100A0404 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x34 ; C2028134 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s5, v1 ; 100C0205 V_MOV_B32_e32 v7, 1.000000e+00 ; 7E0E02F2 V_MOV_B32_e32 v8, 0.000000e+00 ; 7E100280 EXP 15, 32, 0, 0, 0, v6, v5, v8, v7 ; F800020F 07080506 S_BUFFER_LOAD_DWORD s6, s[0:3], 0x31 ; C2030131 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_ADD_F32_e32 v5, s6, v2 ; 060A0406 V_MUL_F32_e32 v5, s4, v5 ; 100A0A04 S_BUFFER_LOAD_DWORD s6, s[0:3], 0x36 ; C2030136 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s6, v5 ; 100A0A06 S_BUFFER_LOAD_DWORD s7, s[0:3], 0x30 ; C2038130 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s7, v1 ; 060C0207 V_MUL_F32_e32 v6, s5, v6 ; 100C0C05 V_MUL_F32_e32 v6, s6, v6 ; 100C0C06 EXP 15, 33, 0, 0, 0, v6, v5, v8, v7 ; F800021F 07080506 S_BUFFER_LOAD_DWORD s6, s[0:3], 0x33 ; C2030133 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_ADD_F32_e32 v5, s6, v2 ; 060A0406 V_MUL_F32_e32 v5, s4, v5 ; 100A0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x37 ; C2020137 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v5 ; 100A0A04 S_BUFFER_LOAD_DWORD s6, s[0:3], 0x32 ; C2030132 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s6, v1 ; 06020206 V_MUL_F32_e32 v1, s5, v1 ; 10020205 V_MUL_F32_e32 v1, s4, v1 ; 10020204 EXP 15, 34, 0, 0, 0, v1, v5, v8, v7 ; F800022F 07080501 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 35, 0, 0, 0, v1, v2, v8, v7 ; F800023F 07080201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MUL_F32_e32 v4, s4, v1 ; 10080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v2, v5, v4, 0, 0 ; D2820004 04120B02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xe ; C202010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s4, v4 ; 06080804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v1 ; 100A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v0, v6, v5, 0, 0 ; D2820005 04160D00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v2, v6, v5, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v1 ; 100C0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s4 ; 7E120204 V_MAD_F32 v6, v0, v9, v6, 0, 0 ; D2820006 041A1300 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s4 ; 7E120204 V_MAD_F32 v0, v2, v9, v6, 0, 0 ; D2820000 041A1302 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xc ; C202010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s4, v0 ; 06000004 EXP 15, 36, 0, 0, 0, v0, v5, v4, v7 ; F800024F 07040500 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v1, s4, v5 ; 10020A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v0, s4, v1, 0, 0 ; D2820001 04040900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1f ; C202011F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v5 ; 10040A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x11 ; C2020111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v0, s4, v2, 0, 0 ; D2820002 04080900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v4, s4, v2, 0, 0 ; D2820002 04080904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1d ; C202011D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v2, s4, v2 ; 06040404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v5 ; 10060A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v0, s4, v3, 0, 0 ; D2820003 040C0900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1c ; C202011C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 EXP 15, 37, 0, 0, 0, v3, v2, v1, v7 ; F800025F 07010203 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x25 ; C2020125 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v6, s4, v5 ; 100C0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v0, s4, v6, 0, 0 ; D2820006 04180900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x29 ; C2020129 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v4, s4, v6, 0, 0 ; D2820006 04180904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2d ; C202012D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s4, v6 ; 060C0C04 V_MAD_F32 v6, v6, -5.000000e-01, -5.000000e-01, 0, 0 ; D2820006 03C5E306 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x24 ; C2020124 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v5 ; 100E0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x20 ; C2020120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v0, s4, v7, 0, 0 ; D2820007 041C0900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x28 ; C2020128 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v4, s4, v7, 0, 0 ; D2820007 041C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2c ; C202012C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v7, s4, v7 ; 060E0E04 V_MAD_F32 v7, v7, 5.000000e-01, -5.000000e-01, 0, 0 ; D2820007 03C5E107 EXP 15, 38, 0, 0, 0, v7, v6, v8, v8 ; F800026F 08080607 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v5, s4, v5 ; 100A0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s4, v5, 0, 0 ; D2820000 04140900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x1e ; C200011E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 EXP 15, 12, 0, 1, 0, v3, v2, v0, v1 ; F80008CF 01000203 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL IN[6], GENERIC[15], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SAMP[11] DCL CONST[0..14] DCL TEMP[0..24], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 2.2000} IMM[1] FLT32 { 0.3330, 2.0000, -2.0000, 0.0000} IMM[2] FLT32 { 0.5000, -0.0000, -16.0000, -1.4427} IMM[3] FLT32 { 16.0000, 0.0050, 0.5000, -0.5000} IMM[4] FLT32 { 6.0000, 2.0000, -2.0000, 0.2500} IMM[5] FLT32 { 0.8000, 0.4545, 0.4000, 1.4427} 0: MAD TEMP[0].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[9], 2D 3: MOV TEMP[1].z, TEMP[0] 4: ABS TEMP[2].x, TEMP[0] 5: MOV TEMP[2], -TEMP[2].xxxx 6: FSGE TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz 7: UIF TEMP[3].xxxx :0 8: MOV TEMP[3].x, IMM[0].yyyy 9: ELSE :0 10: MOV TEMP[3].x, IMM[0].zzzz 11: ENDIF 12: MOV TEMP[3].x, TEMP[3].xxxx 13: FSGE TEMP[4].x, TEMP[2].yyyy, IMM[0].zzzz 14: UIF TEMP[4].xxxx :0 15: MOV TEMP[4].x, IMM[0].yyyy 16: ELSE :0 17: MOV TEMP[4].x, IMM[0].zzzz 18: ENDIF 19: MOV TEMP[3].y, TEMP[4].xxxx 20: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[0].zzzz 21: UIF TEMP[4].xxxx :0 22: MOV TEMP[4].x, IMM[0].yyyy 23: ELSE :0 24: MOV TEMP[4].x, IMM[0].zzzz 25: ENDIF 26: MOV TEMP[3].z, TEMP[4].xxxx 27: FSGE TEMP[2].x, TEMP[2].wwww, IMM[0].zzzz 28: UIF TEMP[2].xxxx :0 29: ELSE :0 30: ENDIF 31: FSLT TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz 32: OR TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz 33: OR TEMP[4].x, TEMP[4].xxxx, TEMP[3].yyyy 34: UIF TEMP[4].xxxx :0 35: KILL 36: ENDIF 37: MOV TEMP[3].xy, IN[3].xyyy 38: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 2D 39: POW TEMP[4].x, TEMP[3].xxxx, IMM[0].wwww 40: POW TEMP[4].y, TEMP[3].yyyy, IMM[0].wwww 41: POW TEMP[4].z, TEMP[3].zzzz, IMM[0].wwww 42: MOV TEMP[2].xyz, TEMP[4].xyzx 43: RCP TEMP[3].x, CONST[0].xxxx 44: MUL TEMP[3].y, TEMP[3].xxxx, IMM[1].xxxx 45: MUL TEMP[5].w, TEMP[3].yyyy, TEMP[3].yyyy 46: MOV TEMP[6].xy, IN[0].xyyy 47: TEX TEMP[6], TEMP[6], SAMP[0], 2D 48: MOV TEMP[7].w, TEMP[6].wwzw 49: MOV TEMP[8].xy, IN[0].xyyy 50: TEX TEMP[8].x, TEMP[8], SAMP[1], 2D 51: MAD TEMP[9].xy, TEMP[6], IMM[1].yyyy, IMM[0].yyyy 52: MOV TEMP[7].xy, TEMP[9].xyxx 53: MUL TEMP[10].yz, TEMP[3].yyyy, TEMP[7].xxyw 54: MOV TEMP[9].z, TEMP[10].zyzz 55: MAD TEMP[8].z, TEMP[8].xxxx, IMM[1].yyyy, IMM[0].yyyy 56: MOV TEMP[8].z, TEMP[8].zzzz 57: MOV TEMP[8].xy, TEMP[6].zwzz 58: MOV TEMP[6].xy, IN[1].xyyy 59: TEX TEMP[6], TEMP[6], SAMP[2], 2D 60: MOV TEMP[11].w, TEMP[6].wwww 61: MOV TEMP[12].xy, IN[1].xyyy 62: TEX TEMP[12].xw, TEMP[12], SAMP[3], 2D 63: MOV TEMP[13].w, TEMP[12].wwww 64: ADD TEMP[14].zw, TEMP[6].xyxy, TEMP[6].xyxy 65: MOV TEMP[7].zw, TEMP[14].wwzw 66: MAD TEMP[14].zw, TEMP[7], IMM[0].yxyx, IMM[0].yxxy 67: MOV TEMP[7].zw, TEMP[14].wwzw 68: MUL TEMP[14].zw, TEMP[3].yyyy, TEMP[7] 69: MOV TEMP[7].zw, TEMP[14].wwzw 70: MAD TEMP[15].xy, TEMP[7], TEMP[3].yyyy, TEMP[14].zwzw 71: MOV TEMP[7].xy, TEMP[15].xyxx 72: MAD TEMP[12].z, TEMP[12].xxxx, IMM[1].zzzz, IMM[0].xxxx 73: MOV TEMP[13].z, TEMP[12].zzzz 74: MOV TEMP[13].xy, TEMP[6].zwzz 75: MUL TEMP[6].xyz, TEMP[5].wwww, TEMP[13] 76: MOV TEMP[11].xyz, TEMP[6].xyzx 77: MAD TEMP[6].xyz, TEMP[8], TEMP[5].wwww, TEMP[11] 78: MOV TEMP[8].xyz, TEMP[6].xyzx 79: ADD TEMP[6].xw, TEMP[10].yyzz, TEMP[10].yyzz 80: MOV TEMP[9].xw, TEMP[6].xxxw 81: MUL TEMP[6].xy, TEMP[14].zwzw, TEMP[9].xwzw 82: MOV TEMP[11].xy, TEMP[6].xyxx 83: DP2 TEMP[6].x, TEMP[10].yzzz, TEMP[14].wzzz 84: MOV TEMP[11].z, TEMP[6].xxxx 85: ADD TEMP[6].xyz, TEMP[8], TEMP[11] 86: MOV TEMP[8].xyz, TEMP[6].xyzx 87: MOV TEMP[6].xy, IN[2].xyyy 88: TEX TEMP[6], TEMP[6], SAMP[2], 2D 89: MOV TEMP[11].zw, TEMP[6].wwzw 90: MOV TEMP[12].xy, IN[2].xyyy 91: TEX TEMP[12].xw, TEMP[12], SAMP[3], 2D 92: MOV TEMP[13].xw, TEMP[12].xxxw 93: MAD TEMP[15].xy, TEMP[6].yxzw, IMM[1].yyyy, IMM[0].yyyy 94: MOV TEMP[11].xy, TEMP[15].xyxx 95: MUL TEMP[15].yz, TEMP[3].yyyy, TEMP[11].xxyw 96: MOV TEMP[13].yz, TEMP[15].zyzz 97: MAD TEMP[3].xy, TEMP[11].yxzw, TEMP[3].yyyy, TEMP[7] 98: MOV TEMP[3].xy, TEMP[3].xyxx 99: MAD TEMP[12].z, TEMP[12].xxxx, IMM[1].yyyy, IMM[0].yyyy 100: MOV TEMP[12].z, TEMP[12].zzzz 101: MOV TEMP[12].xy, TEMP[6].zwzz 102: MAD TEMP[5].xyz, TEMP[12], TEMP[5].wwww, TEMP[8] 103: MOV TEMP[8].xyz, TEMP[5].xyzx 104: MUL TEMP[5].xy, TEMP[9].xwzw, TEMP[13].zyzw 105: MOV TEMP[11].xy, TEMP[5].xyxx 106: DP2 TEMP[5].x, TEMP[10].yzzz, TEMP[15].yzzz 107: MOV TEMP[11].z, TEMP[5].xxxx 108: ADD TEMP[5].xyz, TEMP[8], TEMP[11] 109: MOV TEMP[9].xyz, TEMP[5].xyzx 110: MUL TEMP[5].yw, TEMP[7].xzzw, TEMP[13].xzzy 111: MOV TEMP[1].yw, TEMP[5].wyww 112: ADD TEMP[5].xy, TEMP[1].ywzw, TEMP[1].ywzw 113: MOV TEMP[8].xy, TEMP[5].xyxx 114: DP2 TEMP[5].x, TEMP[14].zwww, TEMP[15].yzzz 115: MOV TEMP[8].z, TEMP[5].xxxx 116: ADD TEMP[5].xyz, TEMP[9], TEMP[8] 117: MOV TEMP[7].xyz, TEMP[5].xyzx 118: MAD TEMP[5].xyz, TEMP[3].xyxw, -TEMP[3].xyyw, TEMP[7] 119: MOV TEMP[7].xy, TEMP[5].xyzx 120: MOV TEMP[3].z, IMM[0].xxxx 121: DP3 TEMP[6].x, CONST[1].xyzz, TEMP[3].xyzz 122: MOV_SAT TEMP[6].x, TEMP[6].xxxx 123: ADD TEMP[10].xyz, CONST[8], -IN[4] 124: MOV TEMP[9].xyz, TEMP[10].xyzx 125: DP3 TEMP[10].x, TEMP[10].xyzz, TEMP[10].xyzz 126: MAX TEMP[10].x, TEMP[10].xxxx, IMM[1].wwww 127: RSQ TEMP[10].x, TEMP[10].xxxx 128: MUL TEMP[14].xyz, TEMP[10].xxxx, TEMP[9] 129: MAD TEMP[15].xyz, TEMP[9], TEMP[10].xxxx, -CONST[5] 130: MOV TEMP[11].xyz, TEMP[15].xyzx 131: MAD TEMP[15].xyz, CONST[5].wwww, TEMP[11], CONST[5] 132: MOV TEMP[11].xyz, TEMP[15].xyzx 133: DP3 TEMP[15].x, TEMP[15].xyzz, TEMP[15].xyzz 134: MAX TEMP[15].x, TEMP[15].xxxx, IMM[1].wwww 135: RSQ TEMP[15].x, TEMP[15].xxxx 136: MAD TEMP[15].xyz, TEMP[11], TEMP[15].xxxx, CONST[1] 137: MOV TEMP[11].xyz, TEMP[15].xyzx 138: RCP TEMP[15].x, TEMP[15].zzzz 139: MAD TEMP[15].xy, TEMP[11], TEMP[15].xxxx, -TEMP[3] 140: RCP TEMP[16].x, CONST[3].wwww 141: ADD TEMP[17].zw, TEMP[16].xxxx, TEMP[5].xyxy 142: MUL TEMP[18].w, TEMP[5].zzzz, TEMP[5].zzzz 143: MOV TEMP[7].w, TEMP[18].wwww 144: MAD TEMP[19].w, TEMP[17].zzzz, TEMP[17].wwww, -TEMP[18].wwww 145: MUL TEMP[20].w, TEMP[15].xxxx, TEMP[15].xxxx 146: ADD TEMP[5].z, TEMP[5].zzzz, TEMP[5].zzzz 147: MOV TEMP[7].z, TEMP[5].zzzz 148: MUL TEMP[21].x, TEMP[15].xxxx, TEMP[5].zzzz 149: MAD TEMP[22].z, TEMP[17].zzzz, TEMP[15].yyyy, -TEMP[21].xxxx 150: MUL TEMP[22].z, TEMP[15].yyyy, TEMP[22].zzzz 151: MAD TEMP[17].z, TEMP[20].wwww, TEMP[17].wwww, TEMP[22].zzzz 152: MUL TEMP[17].z, TEMP[17].zzzz, IMM[2].xxxx 153: RCP TEMP[22].x, TEMP[19].wwww 154: MUL TEMP[23].x, TEMP[22].xxxx, TEMP[17].zzzz 155: MOV TEMP[24].x, -TEMP[19].wwww 156: FSGE TEMP[24].x, TEMP[24].xxxx, IMM[0].zzzz 157: UIF TEMP[24].xxxx :0 158: MOV TEMP[24].x, IMM[0].xxxx 159: ELSE :0 160: MOV TEMP[24].x, IMM[2].yyyy 161: ENDIF 162: MAD TEMP[17].z, TEMP[17].zzzz, TEMP[22].xxxx, IMM[2].zzzz 163: FSGE TEMP[17].x, TEMP[17].zzzz, IMM[0].zzzz 164: UIF TEMP[17].xxxx :0 165: MOV TEMP[17].x, IMM[0].xxxx 166: ELSE :0 167: MOV TEMP[17].x, IMM[2].yyyy 168: ENDIF 169: ADD TEMP[17].z, TEMP[17].xxxx, TEMP[24].xxxx 170: MUL TEMP[22].w, TEMP[23].xxxx, IMM[2].wwww 171: EX2 TEMP[22].x, TEMP[22].wwww 172: MAX TEMP[19].x, TEMP[19].wwww, IMM[1].wwww 173: RSQ TEMP[19].x, TEMP[19].xxxx 174: MUL TEMP[19].w, TEMP[19].xxxx, TEMP[22].xxxx 175: MOV TEMP[9].w, TEMP[19].wwww 176: MAD TEMP[16].xy, TEMP[16].xxxx, IMM[3].xxxx, TEMP[7] 177: MAD TEMP[22].w, TEMP[16].xxxx, TEMP[16].yyyy, -TEMP[18].wwww 178: MAD TEMP[21].x, TEMP[16].xxxx, TEMP[15].yyyy, -TEMP[21].xxxx 179: MUL TEMP[15].x, TEMP[15].yyyy, TEMP[21].xxxx 180: MAD TEMP[15].w, TEMP[20].wwww, TEMP[16].yyyy, TEMP[15].xxxx 181: MUL TEMP[15].w, TEMP[15].wwww, IMM[2].xxxx 182: RCP TEMP[11].x, TEMP[22].wwww 183: MUL TEMP[16].y, TEMP[15].wwww, TEMP[11].xxxx 184: MOV TEMP[20].x, -TEMP[22].wwww 185: FSGE TEMP[20].x, TEMP[20].xxxx, IMM[0].zzzz 186: UIF TEMP[20].xxxx :0 187: MOV TEMP[20].x, IMM[0].xxxx 188: ELSE :0 189: MOV TEMP[20].x, IMM[2].yyyy 190: ENDIF 191: MAD TEMP[11].w, TEMP[15].wwww, TEMP[11].xxxx, IMM[2].zzzz 192: FSGE TEMP[11].x, TEMP[11].wwww, IMM[0].zzzz 193: UIF TEMP[11].xxxx :0 194: MOV TEMP[11].x, IMM[0].xxxx 195: ELSE :0 196: MOV TEMP[11].x, IMM[2].yyyy 197: ENDIF 198: ADD TEMP[11].w, TEMP[11].xxxx, TEMP[20].xxxx 199: MOV TEMP[8].w, TEMP[11].wwww 200: MUL TEMP[15].x, TEMP[16].yyyy, IMM[2].wwww 201: MAX TEMP[16].x, TEMP[22].wwww, IMM[1].wwww 202: RSQ TEMP[16].x, TEMP[16].xxxx 203: EX2 TEMP[15].x, TEMP[15].xxxx 204: MUL TEMP[15].w, TEMP[16].xxxx, TEMP[15].xxxx 205: MUL TEMP[15].w, TEMP[15].wwww, IMM[3].yyyy 206: MOV TEMP[2].w, TEMP[15].wwww 207: MAD TEMP[10].xyz, TEMP[9], TEMP[10].xxxx, -CONST[6] 208: MOV TEMP[9].xyz, TEMP[10].xyzx 209: MAD TEMP[10].xyz, CONST[6].wwww, TEMP[9], CONST[6] 210: MOV TEMP[9].xyz, TEMP[10].xyzx 211: DP3 TEMP[10].x, TEMP[10].xyzz, TEMP[10].xyzz 212: MAX TEMP[10].x, TEMP[10].xxxx, IMM[1].wwww 213: RSQ TEMP[10].x, TEMP[10].xxxx 214: MAD TEMP[10].xyz, TEMP[9], TEMP[10].xxxx, CONST[2] 215: MOV TEMP[9].xyz, TEMP[10].xyzx 216: RCP TEMP[10].x, TEMP[10].zzzz 217: MAD TEMP[10].xy, TEMP[9], TEMP[10].xxxx, -TEMP[3] 218: RCP TEMP[16].x, CONST[4].wwww 219: ADD TEMP[16].xy, TEMP[16].xxxx, TEMP[7] 220: MAD TEMP[18].w, TEMP[16].xxxx, TEMP[16].yyyy, -TEMP[18].wwww 221: MUL TEMP[20].w, TEMP[10].xxxx, TEMP[10].xxxx 222: MUL TEMP[5].z, TEMP[5].zzzz, TEMP[10].xxxx 223: MAD TEMP[5].z, TEMP[16].xxxx, TEMP[10].yyyy, -TEMP[5].zzzz 224: MUL TEMP[5].z, TEMP[10].yyyy, TEMP[5].zzzz 225: MAD TEMP[5].z, TEMP[20].wwww, TEMP[16].yyyy, TEMP[5].zzzz 226: MUL TEMP[5].z, TEMP[5].zzzz, IMM[2].xxxx 227: RCP TEMP[10].x, TEMP[18].wwww 228: MUL TEMP[16].x, TEMP[10].xxxx, TEMP[5].zzzz 229: MOV TEMP[20].x, -TEMP[18].wwww 230: FSGE TEMP[20].x, TEMP[20].xxxx, IMM[0].zzzz 231: UIF TEMP[20].xxxx :0 232: MOV TEMP[20].x, IMM[0].xxxx 233: ELSE :0 234: MOV TEMP[20].x, IMM[2].yyyy 235: ENDIF 236: MAD TEMP[5].z, TEMP[5].zzzz, TEMP[10].xxxx, IMM[2].zzzz 237: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 238: UIF TEMP[5].xxxx :0 239: MOV TEMP[5].x, IMM[0].xxxx 240: ELSE :0 241: MOV TEMP[5].x, IMM[2].yyyy 242: ENDIF 243: ADD TEMP[5].z, TEMP[5].xxxx, TEMP[20].xxxx 244: MOV TEMP[7].z, TEMP[5].zzzz 245: MUL TEMP[10].w, TEMP[16].xxxx, IMM[2].wwww 246: EX2 TEMP[10].x, TEMP[10].wwww 247: MAX TEMP[16].x, TEMP[18].wwww, IMM[1].wwww 248: RSQ TEMP[16].x, TEMP[16].xxxx 249: MUL TEMP[10].w, TEMP[10].xxxx, TEMP[16].xxxx 250: MOV TEMP[7].w, TEMP[10].wwww 251: MUL TEMP[16].xyz, CONST[10].xyww, IN[4].yyyy 252: MOV TEMP[9].xyz, TEMP[16].xyzx 253: MAD TEMP[16].xyz, IN[4].xxxx, CONST[9].xyww, TEMP[9] 254: MOV TEMP[9].xyz, TEMP[16].xyzx 255: MAD TEMP[16].xyz, IN[4].zzzz, CONST[11].xyww, TEMP[9] 256: MOV TEMP[9].xyz, TEMP[16].xyzx 257: ADD TEMP[16].xyz, TEMP[9], CONST[12].xyww 258: MOV TEMP[9].xy, TEMP[16].xyzx 259: RCP TEMP[16].x, TEMP[16].zzzz 260: MOV TEMP[9].z, TEMP[16].xxxx 261: MUL TEMP[16].xy, TEMP[16].xxxx, TEMP[9] 262: MOV TEMP[9].xy, TEMP[16].xyxx 263: MAD TEMP[16].xy, TEMP[9], IMM[3].zwzw, IMM[3].wwww 264: MOV TEMP[9].xy, TEMP[16].xyxx 265: MAD TEMP[16].xy, TEMP[7], IMM[4].xxxx, TEMP[9] 266: MOV TEMP[7].xy, TEMP[16].xyxx 267: MAD TEMP[16].xy, TEMP[18].wwww, IMM[1].yyyy, TEMP[7] 268: MOV TEMP[16].xy, TEMP[16].xyyy 269: TEX TEMP[16], TEMP[16], SAMP[4], 2D 270: MAD TEMP[20].xy, TEMP[18].wwww, IMM[4].yzzw, TEMP[7] 271: MOV TEMP[20].xy, TEMP[20].xyyy 272: TEX TEMP[20], TEMP[20], SAMP[4], 2D 273: MUL TEMP[12], TEMP[20], IMM[4].wwww 274: MAD TEMP[13], TEMP[16], IMM[4].wwww, TEMP[12] 275: MAD TEMP[12].xy, TEMP[18].wwww, IMM[4].zxzw, TEMP[7] 276: MOV TEMP[12].xy, TEMP[12].xyyy 277: TEX TEMP[12], TEMP[12], SAMP[4], 2D 278: MAD TEMP[13], TEMP[12], IMM[4].wwww, TEMP[13] 279: MAD TEMP[12].xy, TEMP[18].wwww, IMM[1].zzzz, TEMP[7] 280: MOV TEMP[12].xy, TEMP[12].xyyy 281: TEX TEMP[12], TEMP[12], SAMP[4], 2D 282: MAD TEMP[13], TEMP[12], IMM[4].wwww, TEMP[13] 283: ABS TEMP[12].x, TEMP[13].xxxx 284: LG2 TEMP[9].x, TEMP[12].xxxx 285: ABS TEMP[12].x, TEMP[13].yyyy 286: LG2 TEMP[12].x, TEMP[12].xxxx 287: MOV TEMP[9].y, TEMP[12].xxxx 288: ABS TEMP[12].x, TEMP[13].zzzz 289: LG2 TEMP[12].x, TEMP[12].xxxx 290: MOV TEMP[9].z, TEMP[12].xxxx 291: MUL TEMP[12].xyz, TEMP[9], IMM[0].wwww 292: EX2 TEMP[13].x, TEMP[12].xxxx 293: EX2 TEMP[16].x, TEMP[12].yyyy 294: MOV TEMP[13].y, TEMP[16].xxxx 295: EX2 TEMP[12].x, TEMP[12].zzzz 296: MOV TEMP[13].z, TEMP[12].xxxx 297: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[14].xyzz 298: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].xxxx 299: MUL TEMP[12].x, TEMP[3].wwww, TEMP[3].wwww 300: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx 301: MUL TEMP[14].y, TEMP[3].wwww, TEMP[12].xxxx 302: MUL TEMP[16].xyz, TEMP[19].wwww, CONST[3] 303: MOV TEMP[17], -TEMP[17].zzzz 304: FSGE TEMP[18].x, TEMP[17].xxxx, IMM[0].zzzz 305: UIF TEMP[18].xxxx :0 306: MOV TEMP[18].x, TEMP[16].xxxx 307: ELSE :0 308: MOV TEMP[18].x, IMM[2].yyyy 309: ENDIF 310: MOV TEMP[18].x, TEMP[18].xxxx 311: FSGE TEMP[19].x, TEMP[17].yyyy, IMM[0].zzzz 312: UIF TEMP[19].xxxx :0 313: MOV TEMP[19].x, TEMP[16].yyyy 314: ELSE :0 315: MOV TEMP[19].x, IMM[2].yyyy 316: ENDIF 317: MOV TEMP[18].y, TEMP[19].xxxx 318: FSGE TEMP[19].x, TEMP[17].zzzz, IMM[0].zzzz 319: UIF TEMP[19].xxxx :0 320: MOV TEMP[16].x, TEMP[16].zzzz 321: ELSE :0 322: MOV TEMP[16].x, IMM[2].yyyy 323: ENDIF 324: MOV TEMP[18].z, TEMP[16].xxxx 325: FSGE TEMP[16].x, TEMP[17].wwww, IMM[0].zzzz 326: UIF TEMP[16].xxxx :0 327: ELSE :0 328: ENDIF 329: MOV TEMP[9].xyz, TEMP[18].xyzx 330: MUL TEMP[10].xyz, TEMP[10].wwww, CONST[4] 331: MOV TEMP[8].xyz, TEMP[10].xyzx 332: MUL TEMP[10].xyz, TEMP[8], IMM[5].xxxx 333: MOV TEMP[5], -TEMP[5].zzzz 334: FSGE TEMP[16].x, TEMP[5].xxxx, IMM[0].zzzz 335: UIF TEMP[16].xxxx :0 336: MOV TEMP[16].x, TEMP[10].xxxx 337: ELSE :0 338: MOV TEMP[16].x, IMM[2].yyyy 339: ENDIF 340: MOV TEMP[16].x, TEMP[16].xxxx 341: FSGE TEMP[17].x, TEMP[5].yyyy, IMM[0].zzzz 342: UIF TEMP[17].xxxx :0 343: MOV TEMP[17].x, TEMP[10].yyyy 344: ELSE :0 345: MOV TEMP[17].x, IMM[2].yyyy 346: ENDIF 347: MOV TEMP[16].y, TEMP[17].xxxx 348: FSGE TEMP[17].x, TEMP[5].zzzz, IMM[0].zzzz 349: UIF TEMP[17].xxxx :0 350: MOV TEMP[10].x, TEMP[10].zzzz 351: ELSE :0 352: MOV TEMP[10].x, IMM[2].yyyy 353: ENDIF 354: MOV TEMP[16].z, TEMP[10].xxxx 355: FSGE TEMP[5].x, TEMP[5].wwww, IMM[0].zzzz 356: UIF TEMP[5].xxxx :0 357: ELSE :0 358: ENDIF 359: MOV TEMP[8].xyz, TEMP[16].xyzx 360: ADD TEMP[5].xyz, TEMP[9], TEMP[8] 361: MAD TEMP[4].yzw, TEMP[5].xxyz, TEMP[14].yyyy, TEMP[4].xxyz 362: MOV TEMP[7].w, TEMP[4].zyzw 363: MUL TEMP[5].xyz, TEMP[2], CONST[3] 364: MOV TEMP[2].xyz, TEMP[5].xyzx 365: MUL TEMP[5].xyz, TEMP[15].wwww, TEMP[2] 366: MOV TEMP[8], -TEMP[11].wwww 367: FSGE TEMP[10].x, TEMP[8].xxxx, IMM[0].zzzz 368: UIF TEMP[10].xxxx :0 369: MOV TEMP[10].x, TEMP[5].xxxx 370: ELSE :0 371: MOV TEMP[10].x, IMM[2].yyyy 372: ENDIF 373: MOV TEMP[10].x, TEMP[10].xxxx 374: FSGE TEMP[11].x, TEMP[8].yyyy, IMM[0].zzzz 375: UIF TEMP[11].xxxx :0 376: MOV TEMP[11].x, TEMP[5].yyyy 377: ELSE :0 378: MOV TEMP[11].x, IMM[2].yyyy 379: ENDIF 380: MOV TEMP[10].y, TEMP[11].xxxx 381: FSGE TEMP[11].x, TEMP[8].zzzz, IMM[0].zzzz 382: UIF TEMP[11].xxxx :0 383: MOV TEMP[5].x, TEMP[5].zzzz 384: ELSE :0 385: MOV TEMP[5].x, IMM[2].yyyy 386: ENDIF 387: MOV TEMP[10].z, TEMP[5].xxxx 388: FSGE TEMP[5].x, TEMP[8].wwww, IMM[0].zzzz 389: UIF TEMP[5].xxxx :0 390: ELSE :0 391: ENDIF 392: MOV TEMP[2].xyz, TEMP[10].xyzx 393: MAD TEMP[4].xyz, TEMP[4].yzww, TEMP[6].xxxx, TEMP[2] 394: MOV TEMP[2].xyz, TEMP[4].xyzx 395: MAD TEMP[4].xyz, TEMP[13], CONST[14].xxxx, TEMP[2] 396: MOV TEMP[2].xyz, TEMP[4].xyzx 397: MOV TEMP[4].xy, IN[3].xyyy 398: TEX TEMP[4].w, TEMP[4], SAMP[5], 2D 399: MOV TEMP[9].w, TEMP[4].wwww 400: MAD TEMP[3].y, TEMP[3].wwww, TEMP[12].xxxx, IMM[2].xxxx 401: MUL TEMP[3].xyz, TEMP[3].yyyy, TEMP[2] 402: ADD TEMP[5], TEMP[13].wwww, IMM[0].xxxx 403: MOV_SAT TEMP[5], TEMP[5] 404: MUL TEMP[4].w, TEMP[4].wwww, TEMP[5].yyyy 405: MOV TEMP[4].w, TEMP[4].wwww 406: ABS TEMP[5].x, TEMP[3].xxxx 407: LG2 TEMP[7].x, TEMP[5].xxxx 408: ABS TEMP[5].x, TEMP[3].yyyy 409: LG2 TEMP[5].x, TEMP[5].xxxx 410: MOV TEMP[7].y, TEMP[5].xxxx 411: ABS TEMP[3].x, TEMP[3].zzzz 412: LG2 TEMP[3].x, TEMP[3].xxxx 413: MOV TEMP[7].z, TEMP[3].xxxx 414: MUL TEMP[3].xyz, TEMP[7], IMM[5].yyyy 415: EX2 TEMP[7].x, TEMP[3].xxxx 416: EX2 TEMP[5].x, TEMP[3].yyyy 417: MOV TEMP[7].y, TEMP[5].xxxx 418: EX2 TEMP[3].x, TEMP[3].zzzz 419: MOV TEMP[7].z, TEMP[3].xxxx 420: MOV TEMP[3].xyz, TEMP[7].xyzz 421: TEX TEMP[3], TEMP[3], SAMP[11], 3D 422: MOV TEMP[2].w, TEMP[3].wwww 423: MAD TEMP[5].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 424: MOV TEMP[5].xy, TEMP[5].xyyy 425: TEX TEMP[5], TEMP[5], SAMP[8], 2D 426: MOV TEMP[7].w, TEMP[5].wwww 427: ADD TEMP[6].y, -TEMP[5].wwww, IMM[0].xxxx 428: MAD TEMP[3].xyz, TEMP[3], TEMP[6].yyyy, TEMP[5] 429: MOV TEMP[2].xyz, TEMP[3].xyzx 430: LRP TEMP[2].xyz, TEMP[0].xxxx, TEMP[2], IMM[5].zzzz 431: MOV TEMP[7].xyz, TEMP[2].xyzx 432: MOV TEMP[2].xyz, TEMP[2].xyzz 433: TEX TEMP[2], TEMP[2], SAMP[10], 3D 434: MAD TEMP[0].x, TEMP[0].zzzz, -TEMP[0].xxxx, TEMP[0].xxxx 435: LRP TEMP[0].xyz, TEMP[0].xxxx, TEMP[2], TEMP[7] 436: MOV TEMP[9].xyz, TEMP[0].xyzx 437: RCP TEMP[1].x, IN[5].zzzz 438: MUL TEMP[2].x, TEMP[1].xxxx, IN[5].yyyy 439: MAD TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx, IMM[2].xxxx 440: MOV TEMP[1].x, TEMP[2].xxxx 441: MOV TEMP[1].y, CONST[7].wwww 442: MOV TEMP[2].xy, TEMP[1].xyyy 443: TEX TEMP[2].x, TEMP[2], SAMP[7], 2D 444: MOV TEMP[1].x, TEMP[2].xxxx 445: ADD TEMP[3].yzw, -CONST[8].xxyz, IN[4].xxyz 446: MOV TEMP[1].w, TEMP[3].zyzw 447: DP3 TEMP[5].x, TEMP[3].yzww, TEMP[3].yzww 448: MOV TEMP[1].y, TEMP[5].xxxx 449: MUL TEMP[1].yz, TEMP[1].xyww, CONST[7].xyxw 450: MUL TEMP[5].z, TEMP[1].zzzz, IMM[5].wwww 451: EX2 TEMP[5].x, TEMP[5].zzzz 452: ADD TEMP[5].z, -TEMP[5].xxxx, IMM[0].xxxx 453: MUL TEMP[1].y, TEMP[5].zzzz, TEMP[1].yyyy 454: RCP TEMP[3].x, TEMP[3].wwww 455: MUL TEMP[1].y, TEMP[3].xxxx, TEMP[1].yyyy 456: MUL TEMP[1].y, TEMP[1].yyyy, IMM[5].wwww 457: EX2 TEMP[1].x, TEMP[1].yyyy 458: MOV_SAT TEMP[1].x, TEMP[1].xxxx 459: ADD TEMP[1].y, -TEMP[1].xxxx, IMM[0].xxxx 460: MUL TEMP[1].x, TEMP[1].yyyy, TEMP[2].xxxx 461: ADD TEMP[0].yzw, -TEMP[0].xxyz, CONST[13].xxyz 462: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[9] 463: MOV TEMP[4].xyz, TEMP[0].xyzx 464: MOV OUT[0], TEMP[4] 465: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %69 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %70 = load <8 x i32> addrspace(2)* %69, !tbaa !0 %71 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %72 = load <4 x i32> addrspace(2)* %71, !tbaa !0 %73 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %74 = load <8 x i32> addrspace(2)* %73, !tbaa !0 %75 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %76 = load <4 x i32> addrspace(2)* %75, !tbaa !0 %77 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %78 = load <8 x i32> addrspace(2)* %77, !tbaa !0 %79 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %80 = load <4 x i32> addrspace(2)* %79, !tbaa !0 %81 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %82 = load <8 x i32> addrspace(2)* %81, !tbaa !0 %83 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %84 = load <4 x i32> addrspace(2)* %83, !tbaa !0 %85 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %86 = load <8 x i32> addrspace(2)* %85, !tbaa !0 %87 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %88 = load <4 x i32> addrspace(2)* %87, !tbaa !0 %89 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %90 = load <8 x i32> addrspace(2)* %89, !tbaa !0 %91 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %92 = load <4 x i32> addrspace(2)* %91, !tbaa !0 %93 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %94 = load <8 x i32> addrspace(2)* %93, !tbaa !0 %95 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %96 = load <4 x i32> addrspace(2)* %95, !tbaa !0 %97 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %98 = load <8 x i32> addrspace(2)* %97, !tbaa !0 %99 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %100 = load <4 x i32> addrspace(2)* %99, !tbaa !0 %101 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %102 = load <8 x i32> addrspace(2)* %101, !tbaa !0 %103 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %104 = load <4 x i32> addrspace(2)* %103, !tbaa !0 %105 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %106 = load <8 x i32> addrspace(2)* %105, !tbaa !0 %107 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %108 = load <4 x i32> addrspace(2)* %107, !tbaa !0 %109 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 10 %110 = load <8 x i32> addrspace(2)* %109, !tbaa !0 %111 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 10 %112 = load <4 x i32> addrspace(2)* %111, !tbaa !0 %113 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 11 %114 = load <8 x i32> addrspace(2)* %113, !tbaa !0 %115 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 11 %116 = load <4 x i32> addrspace(2)* %115, !tbaa !0 %117 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %132 = fmul float %130, 1.000000e+00 %133 = fadd float %132, 0.000000e+00 %134 = fmul float %131, -1.000000e+00 %135 = fadd float %134, 1.000000e+00 %136 = bitcast float %133 to i32 %137 = bitcast float %135 to i32 %138 = insertelement <2 x i32> undef, i32 %136, i32 0 %139 = insertelement <2 x i32> %138, i32 %137, i32 1 %140 = bitcast <8 x i32> %106 to <32 x i8> %141 = bitcast <4 x i32> %108 to <16 x i8> %142 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %139, <32 x i8> %140, <16 x i8> %141, i32 2) %143 = extractelement <4 x float> %142, i32 0 %144 = extractelement <4 x float> %142, i32 2 %145 = call float @fabs(float %143) %146 = fsub float -0.000000e+00, %145 %147 = fsub float -0.000000e+00, %145 %148 = fsub float -0.000000e+00, %145 %149 = fsub float -0.000000e+00, %145 %150 = fcmp oge float %146, 0.000000e+00 %151 = sext i1 %150 to i32 %152 = bitcast i32 %151 to float %153 = bitcast float %152 to i32 %154 = icmp ne i32 %153, 0 %. = select i1 %154, float -1.000000e+00, float 0.000000e+00 %155 = fcmp oge float %147, 0.000000e+00 %156 = sext i1 %155 to i32 %157 = bitcast i32 %156 to float %158 = bitcast float %157 to i32 %159 = icmp ne i32 %158, 0 %temp16.0 = select i1 %159, float -1.000000e+00, float 0.000000e+00 %160 = fcmp oge float %148, 0.000000e+00 %161 = sext i1 %160 to i32 %162 = bitcast i32 %161 to float %163 = bitcast float %162 to i32 %164 = icmp ne i32 %163, 0 %.166 = select i1 %164, float -1.000000e+00, float 0.000000e+00 %165 = fcmp oge float %149, 0.000000e+00 %166 = sext i1 %165 to i32 %167 = bitcast i32 %166 to float %168 = bitcast float %167 to i32 %169 = icmp ne i32 %168, 0 %170 = fcmp olt float %., 0.000000e+00 %171 = sext i1 %170 to i32 %172 = fcmp olt float %temp16.0, 0.000000e+00 %173 = sext i1 %172 to i32 %174 = fcmp olt float %.166, 0.000000e+00 %175 = sext i1 %174 to i32 %176 = bitcast i32 %171 to float %177 = bitcast i32 %173 to float %178 = bitcast i32 %175 to float %179 = bitcast float %176 to i32 %180 = bitcast float %178 to i32 %181 = or i32 %179, %180 %182 = bitcast i32 %181 to float %183 = bitcast float %182 to i32 %184 = bitcast float %177 to i32 %185 = or i32 %183, %184 %186 = bitcast i32 %185 to float %187 = bitcast float %186 to i32 %188 = icmp ne i32 %187, 0 br i1 %188, label %IF110, label %ENDIF109 IF110: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF109 ENDIF109: ; preds = %main_body, %IF110 %189 = bitcast float %123 to i32 %190 = bitcast float %124 to i32 %191 = insertelement <2 x i32> undef, i32 %189, i32 0 %192 = insertelement <2 x i32> %191, i32 %190, i32 1 %193 = bitcast <8 x i32> %94 to <32 x i8> %194 = bitcast <4 x i32> %96 to <16 x i8> %195 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %192, <32 x i8> %193, <16 x i8> %194, i32 2) %196 = extractelement <4 x float> %195, i32 0 %197 = extractelement <4 x float> %195, i32 1 %198 = extractelement <4 x float> %195, i32 2 %199 = call float @llvm.pow.f32(float %196, float 0x40019999A0000000) %200 = call float @llvm.pow.f32(float %197, float 0x40019999A0000000) %201 = call float @llvm.pow.f32(float %198, float 0x40019999A0000000) %202 = fdiv float 1.000000e+00, %24 %203 = fmul float %202, 0x3FD54FDF40000000 %204 = fmul float %203, %203 %205 = bitcast float %117 to i32 %206 = bitcast float %118 to i32 %207 = insertelement <2 x i32> undef, i32 %205, i32 0 %208 = insertelement <2 x i32> %207, i32 %206, i32 1 %209 = bitcast <8 x i32> %70 to <32 x i8> %210 = bitcast <4 x i32> %72 to <16 x i8> %211 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %208, <32 x i8> %209, <16 x i8> %210, i32 2) %212 = extractelement <4 x float> %211, i32 0 %213 = extractelement <4 x float> %211, i32 1 %214 = extractelement <4 x float> %211, i32 2 %215 = extractelement <4 x float> %211, i32 3 %216 = bitcast float %117 to i32 %217 = bitcast float %118 to i32 %218 = insertelement <2 x i32> undef, i32 %216, i32 0 %219 = insertelement <2 x i32> %218, i32 %217, i32 1 %220 = bitcast <8 x i32> %74 to <32 x i8> %221 = bitcast <4 x i32> %76 to <16 x i8> %222 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %219, <32 x i8> %220, <16 x i8> %221, i32 2) %223 = extractelement <4 x float> %222, i32 0 %224 = fmul float %212, 2.000000e+00 %225 = fadd float %224, -1.000000e+00 %226 = fmul float %213, 2.000000e+00 %227 = fadd float %226, -1.000000e+00 %228 = fmul float %203, %225 %229 = fmul float %203, %227 %230 = fmul float %223, 2.000000e+00 %231 = fadd float %230, -1.000000e+00 %232 = bitcast float %119 to i32 %233 = bitcast float %120 to i32 %234 = insertelement <2 x i32> undef, i32 %232, i32 0 %235 = insertelement <2 x i32> %234, i32 %233, i32 1 %236 = bitcast <8 x i32> %78 to <32 x i8> %237 = bitcast <4 x i32> %80 to <16 x i8> %238 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %235, <32 x i8> %236, <16 x i8> %237, i32 2) %239 = extractelement <4 x float> %238, i32 0 %240 = extractelement <4 x float> %238, i32 1 %241 = extractelement <4 x float> %238, i32 2 %242 = extractelement <4 x float> %238, i32 3 %243 = bitcast float %119 to i32 %244 = bitcast float %120 to i32 %245 = insertelement <2 x i32> undef, i32 %243, i32 0 %246 = insertelement <2 x i32> %245, i32 %244, i32 1 %247 = bitcast <8 x i32> %82 to <32 x i8> %248 = bitcast <4 x i32> %84 to <16 x i8> %249 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %246, <32 x i8> %247, <16 x i8> %248, i32 2) %250 = extractelement <4 x float> %249, i32 0 %251 = fadd float %239, %239 %252 = fadd float %240, %240 %253 = fmul float %251, -1.000000e+00 %254 = fadd float %253, 1.000000e+00 %255 = fmul float %252, 1.000000e+00 %256 = fadd float %255, -1.000000e+00 %257 = fmul float %203, %254 %258 = fmul float %203, %256 %259 = fmul float %225, %203 %260 = fadd float %259, %257 %261 = fmul float %227, %203 %262 = fadd float %261, %258 %263 = fmul float %250, -2.000000e+00 %264 = fadd float %263, 1.000000e+00 %265 = fmul float %204, %241 %266 = fmul float %204, %242 %267 = fmul float %204, %264 %268 = fmul float %214, %204 %269 = fadd float %268, %265 %270 = fmul float %215, %204 %271 = fadd float %270, %266 %272 = fmul float %231, %204 %273 = fadd float %272, %267 %274 = fadd float %228, %228 %275 = fadd float %229, %229 %276 = fmul float %257, %274 %277 = fmul float %258, %275 %278 = fmul float %228, %258 %279 = fmul float %229, %257 %280 = fadd float %278, %279 %281 = fadd float %269, %276 %282 = fadd float %271, %277 %283 = fadd float %273, %280 %284 = bitcast float %121 to i32 %285 = bitcast float %122 to i32 %286 = insertelement <2 x i32> undef, i32 %284, i32 0 %287 = insertelement <2 x i32> %286, i32 %285, i32 1 %288 = bitcast <8 x i32> %78 to <32 x i8> %289 = bitcast <4 x i32> %80 to <16 x i8> %290 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %287, <32 x i8> %288, <16 x i8> %289, i32 2) %291 = extractelement <4 x float> %290, i32 0 %292 = extractelement <4 x float> %290, i32 1 %293 = extractelement <4 x float> %290, i32 2 %294 = extractelement <4 x float> %290, i32 3 %295 = bitcast float %121 to i32 %296 = bitcast float %122 to i32 %297 = insertelement <2 x i32> undef, i32 %295, i32 0 %298 = insertelement <2 x i32> %297, i32 %296, i32 1 %299 = bitcast <8 x i32> %82 to <32 x i8> %300 = bitcast <4 x i32> %84 to <16 x i8> %301 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %298, <32 x i8> %299, <16 x i8> %300, i32 2) %302 = extractelement <4 x float> %301, i32 0 %303 = fmul float %292, 2.000000e+00 %304 = fadd float %303, -1.000000e+00 %305 = fmul float %291, 2.000000e+00 %306 = fadd float %305, -1.000000e+00 %307 = fmul float %203, %304 %308 = fmul float %203, %306 %309 = fmul float %306, %203 %310 = fadd float %309, %260 %311 = fmul float %304, %203 %312 = fadd float %311, %262 %313 = fmul float %302, 2.000000e+00 %314 = fadd float %313, -1.000000e+00 %315 = fmul float %293, %204 %316 = fadd float %315, %281 %317 = fmul float %294, %204 %318 = fadd float %317, %282 %319 = fmul float %314, %204 %320 = fadd float %319, %283 %321 = fmul float %274, %308 %322 = fmul float %275, %307 %323 = fmul float %228, %307 %324 = fmul float %229, %308 %325 = fadd float %323, %324 %326 = fadd float %316, %321 %327 = fadd float %318, %322 %328 = fadd float %320, %325 %329 = fmul float %257, %308 %330 = fmul float %258, %307 %331 = fadd float %329, %329 %332 = fadd float %330, %330 %333 = fmul float %257, %307 %334 = fmul float %258, %308 %335 = fadd float %333, %334 %336 = fadd float %326, %331 %337 = fadd float %327, %332 %338 = fadd float %328, %335 %339 = fsub float -0.000000e+00, %310 %340 = fmul float %310, %339 %341 = fadd float %340, %336 %342 = fsub float -0.000000e+00, %312 %343 = fmul float %312, %342 %344 = fadd float %343, %337 %345 = fsub float -0.000000e+00, %312 %346 = fmul float %310, %345 %347 = fadd float %346, %338 %348 = fmul float %25, %310 %349 = fmul float %26, %312 %350 = fadd float %349, %348 %351 = fmul float %27, 1.000000e+00 %352 = fadd float %350, %351 %353 = call float @llvm.AMDIL.clamp.(float %352, float 0.000000e+00, float 1.000000e+00) %354 = fsub float -0.000000e+00, %125 %355 = fadd float %50, %354 %356 = fsub float -0.000000e+00, %126 %357 = fadd float %51, %356 %358 = fsub float -0.000000e+00, %127 %359 = fadd float %52, %358 %360 = fmul float %355, %355 %361 = fmul float %357, %357 %362 = fadd float %361, %360 %363 = fmul float %359, %359 %364 = fadd float %362, %363 %365 = fcmp uge float %364, 0x3E7AD7F2A0000000 %366 = select i1 %365, float %364, float 0x3E7AD7F2A0000000 %367 = call float @llvm.AMDGPU.rsq.clamped.f32(float %366) %368 = fmul float %367, %355 %369 = fmul float %367, %357 %370 = fmul float %367, %359 %371 = fsub float -0.000000e+00, %39 %372 = fmul float %355, %367 %373 = fadd float %372, %371 %374 = fsub float -0.000000e+00, %40 %375 = fmul float %357, %367 %376 = fadd float %375, %374 %377 = fsub float -0.000000e+00, %41 %378 = fmul float %359, %367 %379 = fadd float %378, %377 %380 = fmul float %42, %373 %381 = fadd float %380, %39 %382 = fmul float %42, %376 %383 = fadd float %382, %40 %384 = fmul float %42, %379 %385 = fadd float %384, %41 %386 = fmul float %381, %381 %387 = fmul float %383, %383 %388 = fadd float %387, %386 %389 = fmul float %385, %385 %390 = fadd float %388, %389 %391 = fcmp uge float %390, 0x3E7AD7F2A0000000 %392 = select i1 %391, float %390, float 0x3E7AD7F2A0000000 %393 = call float @llvm.AMDGPU.rsq.clamped.f32(float %392) %394 = fmul float %381, %393 %395 = fadd float %394, %25 %396 = fmul float %383, %393 %397 = fadd float %396, %26 %398 = fmul float %385, %393 %399 = fadd float %398, %27 %400 = fdiv float 1.000000e+00, %399 %401 = fsub float -0.000000e+00, %310 %402 = fmul float %395, %400 %403 = fadd float %402, %401 %404 = fsub float -0.000000e+00, %312 %405 = fmul float %397, %400 %406 = fadd float %405, %404 %407 = fdiv float 1.000000e+00, %34 %408 = fadd float %407, %341 %409 = fadd float %407, %344 %410 = fmul float %347, %347 %411 = fsub float -0.000000e+00, %410 %412 = fmul float %408, %409 %413 = fadd float %412, %411 %414 = fmul float %403, %403 %415 = fadd float %347, %347 %416 = fmul float %403, %415 %417 = fsub float -0.000000e+00, %416 %418 = fmul float %408, %406 %419 = fadd float %418, %417 %420 = fmul float %406, %419 %421 = fmul float %414, %409 %422 = fadd float %421, %420 %423 = fmul float %422, 5.000000e-01 %424 = fdiv float 1.000000e+00, %413 %425 = fmul float %424, %423 %426 = fsub float -0.000000e+00, %413 %427 = fcmp oge float %426, 0.000000e+00 %428 = sext i1 %427 to i32 %429 = bitcast i32 %428 to float %430 = bitcast float %429 to i32 %431 = icmp ne i32 %430, 0 %.167 = select i1 %431, float 1.000000e+00, float -0.000000e+00 %432 = fmul float %423, %424 %433 = fadd float %432, -1.600000e+01 %434 = fcmp oge float %433, 0.000000e+00 %435 = sext i1 %434 to i32 %436 = bitcast i32 %435 to float %437 = bitcast float %436 to i32 %438 = icmp ne i32 %437, 0 %temp68.0 = select i1 %438, float 1.000000e+00, float -0.000000e+00 %439 = fadd float %temp68.0, %.167 %440 = fmul float %425, 0xBFF7154CA0000000 %441 = call float @llvm.AMDIL.exp.(float %440) %442 = fcmp uge float %413, 0x3E7AD7F2A0000000 %443 = select i1 %442, float %413, float 0x3E7AD7F2A0000000 %444 = call float @llvm.AMDGPU.rsq.clamped.f32(float %443) %445 = fmul float %444, %441 %446 = fmul float %407, 1.600000e+01 %447 = fadd float %446, %341 %448 = fmul float %407, 1.600000e+01 %449 = fadd float %448, %344 %450 = fsub float -0.000000e+00, %410 %451 = fmul float %447, %449 %452 = fadd float %451, %450 %453 = fsub float -0.000000e+00, %416 %454 = fmul float %447, %406 %455 = fadd float %454, %453 %456 = fmul float %406, %455 %457 = fmul float %414, %449 %458 = fadd float %457, %456 %459 = fmul float %458, 5.000000e-01 %460 = fdiv float 1.000000e+00, %452 %461 = fmul float %459, %460 %462 = fsub float -0.000000e+00, %452 %463 = fcmp oge float %462, 0.000000e+00 %464 = sext i1 %463 to i32 %465 = bitcast i32 %464 to float %466 = bitcast float %465 to i32 %467 = icmp ne i32 %466, 0 %.168 = select i1 %467, float 1.000000e+00, float -0.000000e+00 %468 = fmul float %459, %460 %469 = fadd float %468, -1.600000e+01 %470 = fcmp oge float %469, 0.000000e+00 %471 = sext i1 %470 to i32 %472 = bitcast i32 %471 to float %473 = bitcast float %472 to i32 %474 = icmp ne i32 %473, 0 %temp44.0 = select i1 %474, float 1.000000e+00, float -0.000000e+00 %475 = fadd float %temp44.0, %.168 %476 = fmul float %461, 0xBFF7154CA0000000 %477 = fcmp uge float %452, 0x3E7AD7F2A0000000 %478 = select i1 %477, float %452, float 0x3E7AD7F2A0000000 %479 = call float @llvm.AMDGPU.rsq.clamped.f32(float %478) %480 = call float @llvm.AMDIL.exp.(float %476) %481 = fmul float %479, %480 %482 = fmul float %481, 0x3F747AE140000000 %483 = fsub float -0.000000e+00, %43 %484 = fmul float %355, %367 %485 = fadd float %484, %483 %486 = fsub float -0.000000e+00, %44 %487 = fmul float %357, %367 %488 = fadd float %487, %486 %489 = fsub float -0.000000e+00, %45 %490 = fmul float %359, %367 %491 = fadd float %490, %489 %492 = fmul float %46, %485 %493 = fadd float %492, %43 %494 = fmul float %46, %488 %495 = fadd float %494, %44 %496 = fmul float %46, %491 %497 = fadd float %496, %45 %498 = fmul float %493, %493 %499 = fmul float %495, %495 %500 = fadd float %499, %498 %501 = fmul float %497, %497 %502 = fadd float %500, %501 %503 = fcmp uge float %502, 0x3E7AD7F2A0000000 %504 = select i1 %503, float %502, float 0x3E7AD7F2A0000000 %505 = call float @llvm.AMDGPU.rsq.clamped.f32(float %504) %506 = fmul float %493, %505 %507 = fadd float %506, %28 %508 = fmul float %495, %505 %509 = fadd float %508, %29 %510 = fmul float %497, %505 %511 = fadd float %510, %30 %512 = fdiv float 1.000000e+00, %511 %513 = fsub float -0.000000e+00, %310 %514 = fmul float %507, %512 %515 = fadd float %514, %513 %516 = fsub float -0.000000e+00, %312 %517 = fmul float %509, %512 %518 = fadd float %517, %516 %519 = fdiv float 1.000000e+00, %38 %520 = fadd float %519, %341 %521 = fadd float %519, %344 %522 = fsub float -0.000000e+00, %410 %523 = fmul float %520, %521 %524 = fadd float %523, %522 %525 = fmul float %515, %515 %526 = fmul float %415, %515 %527 = fsub float -0.000000e+00, %526 %528 = fmul float %520, %518 %529 = fadd float %528, %527 %530 = fmul float %518, %529 %531 = fmul float %525, %521 %532 = fadd float %531, %530 %533 = fmul float %532, 5.000000e-01 %534 = fdiv float 1.000000e+00, %524 %535 = fmul float %534, %533 %536 = fsub float -0.000000e+00, %524 %537 = fcmp oge float %536, 0.000000e+00 %538 = sext i1 %537 to i32 %539 = bitcast i32 %538 to float %540 = bitcast float %539 to i32 %541 = icmp ne i32 %540, 0 %.169 = select i1 %541, float 1.000000e+00, float -0.000000e+00 %542 = fmul float %533, %534 %543 = fadd float %542, -1.600000e+01 %544 = fcmp oge float %543, 0.000000e+00 %545 = sext i1 %544 to i32 %546 = bitcast i32 %545 to float %547 = bitcast float %546 to i32 %548 = icmp ne i32 %547, 0 %temp20.0 = select i1 %548, float 1.000000e+00, float -0.000000e+00 %549 = fadd float %temp20.0, %.169 %550 = fmul float %535, 0xBFF7154CA0000000 %551 = call float @llvm.AMDIL.exp.(float %550) %552 = fcmp uge float %524, 0x3E7AD7F2A0000000 %553 = select i1 %552, float %524, float 0x3E7AD7F2A0000000 %554 = call float @llvm.AMDGPU.rsq.clamped.f32(float %553) %555 = fmul float %551, %554 %556 = fmul float %56, %126 %557 = fmul float %57, %126 %558 = fmul float %58, %126 %559 = fmul float %125, %53 %560 = fadd float %559, %556 %561 = fmul float %125, %54 %562 = fadd float %561, %557 %563 = fmul float %125, %55 %564 = fadd float %563, %558 %565 = fmul float %127, %59 %566 = fadd float %565, %560 %567 = fmul float %127, %60 %568 = fadd float %567, %562 %569 = fmul float %127, %61 %570 = fadd float %569, %564 %571 = fadd float %566, %62 %572 = fadd float %568, %63 %573 = fadd float %570, %64 %574 = fdiv float 1.000000e+00, %573 %575 = fmul float %574, %571 %576 = fmul float %574, %572 %577 = fmul float %575, 5.000000e-01 %578 = fadd float %577, -5.000000e-01 %579 = fmul float %576, -5.000000e-01 %580 = fadd float %579, -5.000000e-01 %581 = fmul float %341, 6.000000e+00 %582 = fadd float %581, %578 %583 = fmul float %344, 6.000000e+00 %584 = fadd float %583, %580 %585 = fmul float %524, 2.000000e+00 %586 = fadd float %585, %582 %587 = fmul float %524, 2.000000e+00 %588 = fadd float %587, %584 %589 = bitcast float %586 to i32 %590 = bitcast float %588 to i32 %591 = insertelement <2 x i32> undef, i32 %589, i32 0 %592 = insertelement <2 x i32> %591, i32 %590, i32 1 %593 = bitcast <8 x i32> %86 to <32 x i8> %594 = bitcast <4 x i32> %88 to <16 x i8> %595 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %592, <32 x i8> %593, <16 x i8> %594, i32 2) %596 = extractelement <4 x float> %595, i32 0 %597 = extractelement <4 x float> %595, i32 1 %598 = extractelement <4 x float> %595, i32 2 %599 = extractelement <4 x float> %595, i32 3 %600 = fmul float %524, 2.000000e+00 %601 = fadd float %600, %582 %602 = fmul float %524, -2.000000e+00 %603 = fadd float %602, %584 %604 = bitcast float %601 to i32 %605 = bitcast float %603 to i32 %606 = insertelement <2 x i32> undef, i32 %604, i32 0 %607 = insertelement <2 x i32> %606, i32 %605, i32 1 %608 = bitcast <8 x i32> %86 to <32 x i8> %609 = bitcast <4 x i32> %88 to <16 x i8> %610 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %607, <32 x i8> %608, <16 x i8> %609, i32 2) %611 = extractelement <4 x float> %610, i32 0 %612 = extractelement <4 x float> %610, i32 1 %613 = extractelement <4 x float> %610, i32 2 %614 = extractelement <4 x float> %610, i32 3 %615 = fmul float %611, 2.500000e-01 %616 = fmul float %612, 2.500000e-01 %617 = fmul float %613, 2.500000e-01 %618 = fmul float %614, 2.500000e-01 %619 = fmul float %596, 2.500000e-01 %620 = fadd float %619, %615 %621 = fmul float %597, 2.500000e-01 %622 = fadd float %621, %616 %623 = fmul float %598, 2.500000e-01 %624 = fadd float %623, %617 %625 = fmul float %599, 2.500000e-01 %626 = fadd float %625, %618 %627 = fmul float %524, -2.000000e+00 %628 = fadd float %627, %582 %629 = fmul float %524, 6.000000e+00 %630 = fadd float %629, %584 %631 = bitcast float %628 to i32 %632 = bitcast float %630 to i32 %633 = insertelement <2 x i32> undef, i32 %631, i32 0 %634 = insertelement <2 x i32> %633, i32 %632, i32 1 %635 = bitcast <8 x i32> %86 to <32 x i8> %636 = bitcast <4 x i32> %88 to <16 x i8> %637 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %634, <32 x i8> %635, <16 x i8> %636, i32 2) %638 = extractelement <4 x float> %637, i32 0 %639 = extractelement <4 x float> %637, i32 1 %640 = extractelement <4 x float> %637, i32 2 %641 = extractelement <4 x float> %637, i32 3 %642 = fmul float %638, 2.500000e-01 %643 = fadd float %642, %620 %644 = fmul float %639, 2.500000e-01 %645 = fadd float %644, %622 %646 = fmul float %640, 2.500000e-01 %647 = fadd float %646, %624 %648 = fmul float %641, 2.500000e-01 %649 = fadd float %648, %626 %650 = fmul float %524, -2.000000e+00 %651 = fadd float %650, %582 %652 = fmul float %524, -2.000000e+00 %653 = fadd float %652, %584 %654 = bitcast float %651 to i32 %655 = bitcast float %653 to i32 %656 = insertelement <2 x i32> undef, i32 %654, i32 0 %657 = insertelement <2 x i32> %656, i32 %655, i32 1 %658 = bitcast <8 x i32> %86 to <32 x i8> %659 = bitcast <4 x i32> %88 to <16 x i8> %660 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %657, <32 x i8> %658, <16 x i8> %659, i32 2) %661 = extractelement <4 x float> %660, i32 0 %662 = extractelement <4 x float> %660, i32 1 %663 = extractelement <4 x float> %660, i32 2 %664 = extractelement <4 x float> %660, i32 3 %665 = fmul float %661, 2.500000e-01 %666 = fadd float %665, %643 %667 = fmul float %662, 2.500000e-01 %668 = fadd float %667, %645 %669 = fmul float %663, 2.500000e-01 %670 = fadd float %669, %647 %671 = fmul float %664, 2.500000e-01 %672 = fadd float %671, %649 %673 = call float @fabs(float %666) %674 = call float @llvm.log2.f32(float %673) %675 = call float @fabs(float %668) %676 = call float @llvm.log2.f32(float %675) %677 = call float @fabs(float %670) %678 = call float @llvm.log2.f32(float %677) %679 = fmul float %674, 0x40019999A0000000 %680 = fmul float %676, 0x40019999A0000000 %681 = fmul float %678, 0x40019999A0000000 %682 = call float @llvm.AMDIL.exp.(float %679) %683 = call float @llvm.AMDIL.exp.(float %680) %684 = call float @llvm.AMDIL.exp.(float %681) %685 = fmul float %310, %368 %686 = fmul float %312, %369 %687 = fadd float %686, %685 %688 = fmul float 1.000000e+00, %370 %689 = fadd float %687, %688 %690 = fsub float -0.000000e+00, %689 %691 = fadd float %690, 1.000000e+00 %692 = fmul float %691, %691 %693 = fmul float %692, %692 %694 = fmul float %691, %693 %695 = fmul float %445, %31 %696 = fmul float %445, %32 %697 = fmul float %445, %33 %698 = fsub float -0.000000e+00, %439 %699 = fsub float -0.000000e+00, %439 %700 = fsub float -0.000000e+00, %439 %701 = fsub float -0.000000e+00, %439 %702 = fcmp oge float %698, 0.000000e+00 %703 = sext i1 %702 to i32 %704 = bitcast i32 %703 to float %705 = bitcast float %704 to i32 %706 = icmp ne i32 %705, 0 %.170 = select i1 %706, float %695, float -0.000000e+00 %707 = fcmp oge float %699, 0.000000e+00 %708 = sext i1 %707 to i32 %709 = bitcast i32 %708 to float %710 = bitcast float %709 to i32 %711 = icmp ne i32 %710, 0 %temp76.0 = select i1 %711, float %696, float -0.000000e+00 %712 = fcmp oge float %700, 0.000000e+00 %713 = sext i1 %712 to i32 %714 = bitcast i32 %713 to float %715 = bitcast float %714 to i32 %716 = icmp ne i32 %715, 0 %.171 = select i1 %716, float %697, float -0.000000e+00 %717 = fcmp oge float %701, 0.000000e+00 %718 = sext i1 %717 to i32 %719 = bitcast i32 %718 to float %720 = bitcast float %719 to i32 %721 = icmp ne i32 %720, 0 %722 = fmul float %555, %35 %723 = fmul float %555, %36 %724 = fmul float %555, %37 %725 = fmul float %722, 0x3FE99999A0000000 %726 = fmul float %723, 0x3FE99999A0000000 %727 = fmul float %724, 0x3FE99999A0000000 %728 = fsub float -0.000000e+00, %549 %729 = fsub float -0.000000e+00, %549 %730 = fsub float -0.000000e+00, %549 %731 = fcmp oge float %728, 0.000000e+00 %732 = sext i1 %731 to i32 %733 = bitcast i32 %732 to float %734 = bitcast float %733 to i32 %735 = icmp ne i32 %734, 0 %temp64.1 = select i1 %735, float %725, float -0.000000e+00 %736 = fcmp oge float %729, 0.000000e+00 %737 = sext i1 %736 to i32 %738 = bitcast i32 %737 to float %739 = bitcast float %738 to i32 %740 = icmp ne i32 %739, 0 %.172 = select i1 %740, float %726, float -0.000000e+00 %741 = fcmp oge float %730, 0.000000e+00 %742 = sext i1 %741 to i32 %743 = bitcast i32 %742 to float %744 = bitcast float %743 to i32 %745 = icmp ne i32 %744, 0 %temp40.0 = select i1 %745, float %727, float -0.000000e+00 %746 = fadd float %.170, %temp64.1 %747 = fadd float %temp76.0, %.172 %748 = fadd float %.171, %temp40.0 %749 = fmul float %746, %694 %750 = fadd float %749, %199 %751 = fmul float %747, %694 %752 = fadd float %751, %200 %753 = fmul float %748, %694 %754 = fadd float %753, %201 %755 = fmul float %199, %31 %756 = fmul float %200, %32 %757 = fmul float %201, %33 %758 = fmul float %482, %755 %759 = fmul float %482, %756 %760 = fmul float %482, %757 %761 = fsub float -0.000000e+00, %475 %762 = fsub float -0.000000e+00, %475 %763 = fsub float -0.000000e+00, %475 %764 = fsub float -0.000000e+00, %475 %765 = fcmp oge float %761, 0.000000e+00 %766 = sext i1 %765 to i32 %767 = bitcast i32 %766 to float %768 = bitcast float %767 to i32 %769 = icmp ne i32 %768, 0 %.173 = select i1 %769, float %758, float -0.000000e+00 %770 = fcmp oge float %762, 0.000000e+00 %771 = sext i1 %770 to i32 %772 = bitcast i32 %771 to float %773 = bitcast float %772 to i32 %774 = icmp ne i32 %773, 0 %temp44.1 = select i1 %774, float %759, float -0.000000e+00 %775 = fcmp oge float %763, 0.000000e+00 %776 = sext i1 %775 to i32 %777 = bitcast i32 %776 to float %778 = bitcast float %777 to i32 %779 = icmp ne i32 %778, 0 %.174 = select i1 %779, float %760, float -0.000000e+00 %780 = fcmp oge float %764, 0.000000e+00 %781 = sext i1 %780 to i32 %782 = bitcast i32 %781 to float %783 = bitcast float %782 to i32 %784 = icmp ne i32 %783, 0 %785 = fmul float %750, %353 %786 = fadd float %785, %.173 %787 = fmul float %752, %353 %788 = fadd float %787, %temp44.1 %789 = fmul float %754, %353 %790 = fadd float %789, %.174 %791 = fmul float %682, %68 %792 = fadd float %791, %786 %793 = fmul float %683, %68 %794 = fadd float %793, %788 %795 = fmul float %684, %68 %796 = fadd float %795, %790 %797 = bitcast float %123 to i32 %798 = bitcast float %124 to i32 %799 = insertelement <2 x i32> undef, i32 %797, i32 0 %800 = insertelement <2 x i32> %799, i32 %798, i32 1 %801 = bitcast <8 x i32> %90 to <32 x i8> %802 = bitcast <4 x i32> %92 to <16 x i8> %803 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %800, <32 x i8> %801, <16 x i8> %802, i32 2) %804 = extractelement <4 x float> %803, i32 3 %805 = fmul float %691, %693 %806 = fadd float %805, 5.000000e-01 %807 = fmul float %806, %792 %808 = fmul float %806, %794 %809 = fmul float %806, %796 %810 = fadd float %672, 1.000000e+00 %811 = fadd float %672, 1.000000e+00 %812 = fadd float %672, 1.000000e+00 %813 = fadd float %672, 1.000000e+00 %814 = call float @llvm.AMDIL.clamp.(float %810, float 0.000000e+00, float 1.000000e+00) %815 = call float @llvm.AMDIL.clamp.(float %811, float 0.000000e+00, float 1.000000e+00) %816 = call float @llvm.AMDIL.clamp.(float %812, float 0.000000e+00, float 1.000000e+00) %817 = call float @llvm.AMDIL.clamp.(float %813, float 0.000000e+00, float 1.000000e+00) %818 = fmul float %804, %815 %819 = call float @fabs(float %807) %820 = call float @llvm.log2.f32(float %819) %821 = call float @fabs(float %808) %822 = call float @llvm.log2.f32(float %821) %823 = call float @fabs(float %809) %824 = call float @llvm.log2.f32(float %823) %825 = fmul float %820, 0x3FDD1743E0000000 %826 = fmul float %822, 0x3FDD1743E0000000 %827 = fmul float %824, 0x3FDD1743E0000000 %828 = call float @llvm.AMDIL.exp.(float %825) %829 = call float @llvm.AMDIL.exp.(float %826) %830 = call float @llvm.AMDIL.exp.(float %827) %831 = bitcast float %828 to i32 %832 = bitcast float %829 to i32 %833 = bitcast float %830 to i32 %834 = insertelement <4 x i32> undef, i32 %831, i32 0 %835 = insertelement <4 x i32> %834, i32 %832, i32 1 %836 = insertelement <4 x i32> %835, i32 %833, i32 2 %837 = insertelement <4 x i32> %836, i32 undef, i32 3 %838 = bitcast <8 x i32> %114 to <32 x i8> %839 = bitcast <4 x i32> %116 to <16 x i8> %840 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %837, <32 x i8> %838, <16 x i8> %839, i32 3) %841 = extractelement <4 x float> %840, i32 0 %842 = extractelement <4 x float> %840, i32 1 %843 = extractelement <4 x float> %840, i32 2 %844 = fmul float %130, 1.000000e+00 %845 = fadd float %844, 0.000000e+00 %846 = fmul float %131, -1.000000e+00 %847 = fadd float %846, 1.000000e+00 %848 = bitcast float %845 to i32 %849 = bitcast float %847 to i32 %850 = insertelement <2 x i32> undef, i32 %848, i32 0 %851 = insertelement <2 x i32> %850, i32 %849, i32 1 %852 = bitcast <8 x i32> %102 to <32 x i8> %853 = bitcast <4 x i32> %104 to <16 x i8> %854 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %851, <32 x i8> %852, <16 x i8> %853, i32 2) %855 = extractelement <4 x float> %854, i32 0 %856 = extractelement <4 x float> %854, i32 1 %857 = extractelement <4 x float> %854, i32 2 %858 = extractelement <4 x float> %854, i32 3 %859 = fsub float -0.000000e+00, %858 %860 = fadd float %859, 1.000000e+00 %861 = fmul float %841, %860 %862 = fadd float %861, %855 %863 = fmul float %842, %860 %864 = fadd float %863, %856 %865 = fmul float %843, %860 %866 = fadd float %865, %857 %867 = call float @llvm.AMDGPU.lrp(float %143, float %862, float 0x3FD99999A0000000) %868 = call float @llvm.AMDGPU.lrp(float %143, float %864, float 0x3FD99999A0000000) %869 = call float @llvm.AMDGPU.lrp(float %143, float %866, float 0x3FD99999A0000000) %870 = bitcast float %867 to i32 %871 = bitcast float %868 to i32 %872 = bitcast float %869 to i32 %873 = insertelement <4 x i32> undef, i32 %870, i32 0 %874 = insertelement <4 x i32> %873, i32 %871, i32 1 %875 = insertelement <4 x i32> %874, i32 %872, i32 2 %876 = insertelement <4 x i32> %875, i32 undef, i32 3 %877 = bitcast <8 x i32> %110 to <32 x i8> %878 = bitcast <4 x i32> %112 to <16 x i8> %879 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %876, <32 x i8> %877, <16 x i8> %878, i32 3) %880 = extractelement <4 x float> %879, i32 0 %881 = extractelement <4 x float> %879, i32 1 %882 = extractelement <4 x float> %879, i32 2 %883 = fsub float -0.000000e+00, %143 %884 = fmul float %144, %883 %885 = fadd float %884, %143 %886 = call float @llvm.AMDGPU.lrp(float %885, float %880, float %867) %887 = call float @llvm.AMDGPU.lrp(float %885, float %881, float %868) %888 = call float @llvm.AMDGPU.lrp(float %885, float %882, float %869) %889 = fdiv float 1.000000e+00, %129 %890 = fmul float %889, %128 %891 = fmul float %890, 5.000000e-01 %892 = fadd float %891, 5.000000e-01 %893 = bitcast float %892 to i32 %894 = bitcast float %49 to i32 %895 = insertelement <2 x i32> undef, i32 %893, i32 0 %896 = insertelement <2 x i32> %895, i32 %894, i32 1 %897 = bitcast <8 x i32> %98 to <32 x i8> %898 = bitcast <4 x i32> %100 to <16 x i8> %899 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %896, <32 x i8> %897, <16 x i8> %898, i32 2) %900 = extractelement <4 x float> %899, i32 0 %901 = fsub float -0.000000e+00, %50 %902 = fadd float %901, %125 %903 = fsub float -0.000000e+00, %51 %904 = fadd float %903, %126 %905 = fsub float -0.000000e+00, %52 %906 = fadd float %905, %127 %907 = fmul float %902, %902 %908 = fmul float %904, %904 %909 = fadd float %908, %907 %910 = fmul float %906, %906 %911 = fadd float %909, %910 %912 = fmul float %911, %48 %913 = fmul float %906, %47 %914 = fmul float %913, 0x3FF7154CA0000000 %915 = call float @llvm.AMDIL.exp.(float %914) %916 = fsub float -0.000000e+00, %915 %917 = fadd float %916, 1.000000e+00 %918 = fmul float %917, %912 %919 = fdiv float 1.000000e+00, %906 %920 = fmul float %919, %918 %921 = fmul float %920, 0x3FF7154CA0000000 %922 = call float @llvm.AMDIL.exp.(float %921) %923 = call float @llvm.AMDIL.clamp.(float %922, float 0.000000e+00, float 1.000000e+00) %924 = fsub float -0.000000e+00, %923 %925 = fadd float %924, 1.000000e+00 %926 = fmul float %925, %900 %927 = fsub float -0.000000e+00, %886 %928 = fadd float %927, %65 %929 = fsub float -0.000000e+00, %887 %930 = fadd float %929, %66 %931 = fsub float -0.000000e+00, %888 %932 = fadd float %931, %67 %933 = fmul float %926, %928 %934 = fadd float %933, %886 %935 = fmul float %926, %930 %936 = fadd float %935, %887 %937 = fmul float %926, %932 %938 = fadd float %937, %888 %939 = call i32 @llvm.SI.packf16(float %934, float %936) %940 = bitcast i32 %939 to float %941 = call i32 @llvm.SI.packf16(float %938, float %818) %942 = bitcast i32 %941 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %940, float %942, float %940, float %942) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.AMDGPU.kilp() ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v10, v0, 0, 6, [m0] ; C8281800 V_INTERP_P2_F32 v10, [v10], v1, 0, 6, [m0] ; C8291801 V_INTERP_P1_F32 v13, v0, 1, 6, [m0] ; C8341900 V_INTERP_P2_F32 v13, [v13], v1, 1, 6, [m0] ; C8351901 V_SUB_F32_e32 v11, 1.000000e+00, v13 ; 08161AF2 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x24 ; C0860524 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x48 ; C0C80748 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[16:23], s[12:15] ; F0800F00 0064060A V_MOV_B32_e32 v2, 0x80000000 ; 7E0402FF 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 V_OR_B32_e32 v2, v6, v2 ; 38040506 V_CMP_GE_F32_e64 s[0:1], v2, 0.000000e+00, 0, 0 ; D00C0000 00010102 V_CNDMASK_B32_e64 v2, 0, -1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000002 0001E680 V_CMP_LT_F32_e64 s[0:1], v2, 0.000000e+00, 0, 0 ; D0020000 00010102 V_CNDMASK_B32_e64 v2, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000002 00018280 V_OR_B32_e32 v2, v2, v2 ; 38040502 V_CMP_NE_I32_e64 s[0:1], v2, 0, 0, 0 ; D10A0000 00010102 V_INTERP_P1_F32 v2, v0, 2, 5, [m0] ; C8081600 V_INTERP_P2_F32 v2, [v2], v1, 2, 5, [m0] ; C8091601 V_INTERP_P1_F32 v5, v0, 1, 5, [m0] ; C8141500 V_INTERP_P2_F32 v5, [v5], v1, 1, 5, [m0] ; C8151501 V_INTERP_P1_F32 v12, v0, 2, 4, [m0] ; C8301200 V_INTERP_P2_F32 v12, [v12], v1, 2, 4, [m0] ; C8311201 V_INTERP_P1_F32 v14, v0, 1, 4, [m0] ; C8381100 V_INTERP_P2_F32 v14, [v14], v1, 1, 4, [m0] ; C8391101 V_INTERP_P1_F32 v15, v0, 0, 4, [m0] ; C83C1000 V_INTERP_P2_F32 v15, [v15], v1, 0, 4, [m0] ; C83D1001 V_INTERP_P1_F32 v4, v0, 1, 3, [m0] ; C8100D00 V_INTERP_P2_F32 v4, [v4], v1, 1, 3, [m0] ; C8110D01 V_INTERP_P1_F32 v3, v0, 0, 3, [m0] ; C80C0C00 V_INTERP_P2_F32 v3, [v3], v1, 0, 3, [m0] ; C80D0C01 V_INTERP_P1_F32 v46, v0, 1, 2, [m0] ; C8B80900 V_INTERP_P2_F32 v46, [v46], v1, 1, 2, [m0] ; C8B90901 V_INTERP_P1_F32 v45, v0, 0, 2, [m0] ; C8B40800 V_INTERP_P2_F32 v45, [v45], v1, 0, 2, [m0] ; C8B50801 V_INTERP_P1_F32 v53, v0, 1, 1, [m0] ; C8D40500 V_INTERP_P2_F32 v53, [v53], v1, 1, 1, [m0] ; C8D50501 V_INTERP_P1_F32 v52, v0, 0, 1, [m0] ; C8D00400 V_INTERP_P2_F32 v52, [v52], v1, 0, 1, [m0] ; C8D10401 V_INTERP_P1_F32 v56, v0, 1, 0, [m0] ; C8E00100 V_INTERP_P2_F32 v56, [v56], v1, 1, 0, [m0] ; C8E10101 V_INTERP_P1_F32 v55, v0, 0, 0, [m0] ; C8DC0000 V_INTERP_P2_F32 v55, [v55], v1, 0, 0, [m0] ; C8DD0001 S_LOAD_DWORDX4 s[8:11], s[2:3], 0x0 ; C0840300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s2, s[8:11], 0x38 ; C2010938 S_BUFFER_LOAD_DWORD s3, s[8:11], 0x36 ; C2018936 S_BUFFER_LOAD_DWORD s12, s[8:11], 0x35 ; C2060935 S_BUFFER_LOAD_DWORD s13, s[8:11], 0x34 ; C2068934 S_BUFFER_LOAD_DWORD s14, s[8:11], 0x33 ; C2070933 S_BUFFER_LOAD_DWORD s15, s[8:11], 0x31 ; C2078931 S_BUFFER_LOAD_DWORD s16, s[8:11], 0x30 ; C2080930 S_BUFFER_LOAD_DWORD s17, s[8:11], 0x2f ; C208892F S_BUFFER_LOAD_DWORD s18, s[8:11], 0x2d ; C209092D S_BUFFER_LOAD_DWORD s19, s[8:11], 0x2c ; C209892C S_BUFFER_LOAD_DWORD s20, s[8:11], 0x2b ; C20A092B S_BUFFER_LOAD_DWORD s21, s[8:11], 0x29 ; C20A8929 S_BUFFER_LOAD_DWORD s22, s[8:11], 0x28 ; C20B0928 S_BUFFER_LOAD_DWORD s23, s[8:11], 0x27 ; C20B8927 S_BUFFER_LOAD_DWORD s24, s[8:11], 0x25 ; C20C0925 S_BUFFER_LOAD_DWORD s25, s[8:11], 0x24 ; C20C8924 S_BUFFER_LOAD_DWORD s26, s[8:11], 0x22 ; C20D0922 S_BUFFER_LOAD_DWORD s27, s[8:11], 0x21 ; C20D8921 S_BUFFER_LOAD_DWORD s28, s[8:11], 0x20 ; C20E0920 S_BUFFER_LOAD_DWORD s29, s[8:11], 0x1f ; C20E891F S_BUFFER_LOAD_DWORD s30, s[8:11], 0x1d ; C20F091D S_BUFFER_LOAD_DWORD s31, s[8:11], 0x1c ; C20F891C S_BUFFER_LOAD_DWORD s32, s[8:11], 0x1b ; C210091B S_BUFFER_LOAD_DWORD s33, s[8:11], 0x1a ; C210891A S_BUFFER_LOAD_DWORD s34, s[8:11], 0x19 ; C2110919 S_BUFFER_LOAD_DWORD s35, s[8:11], 0x18 ; C2118918 S_BUFFER_LOAD_DWORD s36, s[8:11], 0x17 ; C2120917 S_BUFFER_LOAD_DWORD s37, s[8:11], 0x16 ; C2128916 S_BUFFER_LOAD_DWORD s38, s[8:11], 0x15 ; C2130915 S_BUFFER_LOAD_DWORD s39, s[8:11], 0x14 ; C2138914 S_BUFFER_LOAD_DWORD s40, s[8:11], 0x13 ; C2140913 S_BUFFER_LOAD_DWORD s41, s[8:11], 0x12 ; C2148912 S_BUFFER_LOAD_DWORD s42, s[8:11], 0x11 ; C2150911 S_BUFFER_LOAD_DWORD s43, s[8:11], 0x10 ; C2158910 S_BUFFER_LOAD_DWORD s44, s[8:11], 0xf ; C216090F S_BUFFER_LOAD_DWORD s45, s[8:11], 0xe ; C216890E S_BUFFER_LOAD_DWORD s46, s[8:11], 0xd ; C217090D S_BUFFER_LOAD_DWORD s47, s[8:11], 0xc ; C217890C S_BUFFER_LOAD_DWORD s48, s[8:11], 0xa ; C218090A S_BUFFER_LOAD_DWORD s49, s[8:11], 0x9 ; C2188909 S_BUFFER_LOAD_DWORD s50, s[8:11], 0x8 ; C2190908 S_BUFFER_LOAD_DWORD s51, s[8:11], 0x6 ; C2198906 S_BUFFER_LOAD_DWORD s52, s[8:11], 0x5 ; C21A0905 S_BUFFER_LOAD_DWORD s53, s[8:11], 0x4 ; C21A8904 S_BUFFER_LOAD_DWORD s8, s[8:11], 0x0 ; C2040900 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v24, s2 ; 7E300202 V_MOV_B32_e32 v0, s3 ; 7E000203 V_MOV_B32_e32 v18, s12 ; 7E24020C V_MOV_B32_e32 v1, s13 ; 7E02020D V_MOV_B32_e32 v32, s14 ; 7E40020E V_MOV_B32_e32 v35, s15 ; 7E46020F V_MOV_B32_e32 v29, s16 ; 7E3A0210 V_MOV_B32_e32 v34, s17 ; 7E440211 V_MOV_B32_e32 v37, s18 ; 7E4A0212 V_MOV_B32_e32 v30, s19 ; 7E3C0213 V_MOV_B32_e32 v36, s20 ; 7E480214 V_MOV_B32_e32 v39, s21 ; 7E4E0215 V_MOV_B32_e32 v31, s22 ; 7E3E0216 V_MOV_B32_e32 v38, s23 ; 7E4C0217 V_MOV_B32_e32 v40, s24 ; 7E500218 V_MOV_B32_e32 v33, s25 ; 7E420219 V_MOV_B32_e32 v19, s26 ; 7E26021A V_MOV_B32_e32 v23, s27 ; 7E2E021B V_MOV_B32_e32 v22, s28 ; 7E2C021C V_MOV_B32_e32 v17, s29 ; 7E22021D V_MOV_B32_e32 v21, s30 ; 7E2A021E V_MOV_B32_e32 v20, s31 ; 7E28021F V_MOV_B32_e32 v60, s32 ; 7E780220 V_MOV_B32_e32 v61, s33 ; 7E7A0221 V_MOV_B32_e32 v65, s34 ; 7E820222 V_MOV_B32_e32 v64, s35 ; 7E800223 V_MOV_B32_e32 v48, s36 ; 7E600224 V_MOV_B32_e32 v49, s37 ; 7E620225 V_MOV_B32_e32 v51, s38 ; 7E660226 V_MOV_B32_e32 v50, s39 ; 7E640227 V_MOV_B32_e32 v57, s40 ; 7E720228 V_MOV_B32_e32 v54, s41 ; 7E6C0229 V_MOV_B32_e32 v28, s42 ; 7E38022A V_MOV_B32_e32 v26, s43 ; 7E34022B V_MOV_B32_e32 v47, s44 ; 7E5E022C V_MOV_B32_e32 v44, s45 ; 7E58022D V_MOV_B32_e32 v27, s46 ; 7E36022E V_MOV_B32_e32 v25, s47 ; 7E32022F V_MOV_B32_e32 v62, s48 ; 7E7C0230 V_MOV_B32_e32 v58, s49 ; 7E740231 V_MOV_B32_e32 v63, s50 ; 7E7E0232 V_MOV_B32_e32 v41, s51 ; 7E520233 V_MOV_B32_e32 v42, s52 ; 7E540234 V_MOV_B32_e32 v43, s53 ; 7E560235 V_MOV_B32_e32 v59, s8 ; 7E760208 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E V_SUB_F32_e32 v66, v23, v14 ; 08841D17 V_SUB_F32_e32 v67, v22, v15 ; 08861F16 V_MUL_F32_e32 v68, v67, v67 ; 10888743 V_MAD_F32 v68, v66, v66, v68, 0, 0 ; D2820044 05128542 V_SUB_F32_e32 v69, v19, v12 ; 088A1913 V_MAD_F32 v68, v69, v69, v68, 0, 0 ; D2820044 05128B45 V_MOV_B32_e32 v70, 1.000000e-07 ; 7E8C02FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v68, v70 ; 7C0C8D44 V_CMP_U_F32_e64 s[0:1], v68, v68, 0, 0 ; D0100000 00028944 V_CNDMASK_B32_e64 v71, 0, -1, vcc, 0, 0, 0, 0 ; D2000047 01A98280 V_CNDMASK_B32_e64 v72, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000048 00018280 V_OR_B32_e32 v71, v71, v72 ; 388E9147 V_MOV_B32_e32 v72, 0x33d6bf95 ; 7E9002FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v71, 0, 0, 0 ; D10A0000 00010147 V_CNDMASK_B32_e64 v68, v72, v68, s[0:1], 0, 0, 0, 0 ; D2000044 00028948 V_RSQ_CLAMP_F32_e32 v68, v68 ; 7E885944 V_MUL_F32_e32 v66, v68, v66 ; 10848544 V_SUB_F32_e32 v71, v66, v65 ; 088E8342 V_MAD_F32 v65, v60, v71, v65, 0, 0 ; D2820041 05068F3C V_MUL_F32_e32 v67, v68, v67 ; 10868744 V_SUB_F32_e32 v71, v67, v64 ; 088E8143 V_MAD_F32 v64, v60, v71, v64, 0, 0 ; D2820040 05028F3C V_MUL_F32_e32 v71, v64, v64 ; 108E8140 V_MAD_F32 v71, v65, v65, v71, 0, 0 ; D2820047 051E8341 V_MUL_F32_e32 v73, v68, v69 ; 10928B44 V_SUB_F32_e32 v74, v73, v61 ; 08947B49 V_MAD_F32 v60, v60, v74, v61, 0, 0 ; D282003C 04F6953C V_MAD_F32 v61, v60, v60, v71, 0, 0 ; D282003D 051E793C V_CMP_GE_F32_e32 vcc, v61, v70 ; 7C0C8D3D V_CMP_U_F32_e64 s[0:1], v61, v61, 0, 0 ; D0100000 00027B3D V_CNDMASK_B32_e64 v71, 0, -1, vcc, 0, 0, 0, 0 ; D2000047 01A98280 V_CNDMASK_B32_e64 v74, 0, -1, s[0:1], 0, 0, 0, 0 ; D200004A 00018280 V_OR_B32_e32 v71, v71, v74 ; 388E9547 V_CMP_NE_I32_e64 s[0:1], v71, 0, 0, 0 ; D10A0000 00010147 V_CNDMASK_B32_e64 v61, v72, v61, s[0:1], 0, 0, 0, 0 ; D200003D 00027B48 V_RSQ_CLAMP_F32_e32 v61, v61 ; 7E7A593D V_MAD_F32 v63, v64, v61, v63, 0, 0 ; D282003F 04FE7B40 V_MAD_F32 v60, v60, v61, v62, 0, 0 ; D282003C 04FA7B3C V_RCP_F32_e32 v60, v60 ; 7E78553C V_MUL_F32_e32 v62, v63, v60 ; 107C793F S_LOAD_DWORDX4 s[0:3], s[4:5], 0x8 ; C0800508 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x10 ; C0C40710 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[74:77], 15, 0, 0, 0, 0, 0, 0, 0, v[52:53], s[8:15], s[0:3] ; F0800F00 00024A34 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v63, v74, v74 ; 067E954A V_SUB_F32_e32 v63, 1.000000e+00, v63 ; 087E7EF2 V_RCP_F32_e32 v59, v59 ; 7E76553B V_MUL_F32_e32 v59, 3.330000e-01, v59 ; 107676FF 3EAA7EFA V_MUL_F32_e32 v63, v59, v63 ; 107E7F3B S_LOAD_DWORDX4 s[16:19], s[4:5], 0x0 ; C0880500 S_LOAD_DWORDX8 s[20:27], s[6:7], 0x0 ; C0CA0700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[78:81], 15, 0, 0, 0, 0, 0, 0, 0, v[55:56], s[20:27], s[16:19] ; F0800F00 00854E37 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v64, v78, v78 ; 06809D4E V_ADD_F32_e32 v64, -1.000000e+00, v64 ; 068080F3 V_MAD_F32 v71, v59, v64, v63, 0, 0 ; D2820047 04FE813B IMAGE_SAMPLE v[82:85], 15, 0, 0, 0, 0, 0, 0, 0, v[45:46], s[8:15], s[0:3] ; F0800F00 0002522D S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v86, v82, v82 ; 06ACA552 V_ADD_F32_e32 v86, -1.000000e+00, v86 ; 06ACACF3 V_MAD_F32 v71, v59, v86, v71, 0, 0 ; D2820047 051EAD3B V_SUB_F32_e32 v62, v62, v71 ; 087C8F3E V_ADD_F32_e32 v87, v75, v75 ; 06AE974B V_ADD_F32_e32 v87, -1.000000e+00, v87 ; 06AEAEF3 V_MUL_F32_e32 v87, v59, v87 ; 10AEAF3B V_ADD_F32_e32 v88, v79, v79 ; 06B09F4F V_ADD_F32_e32 v88, -1.000000e+00, v88 ; 06B0B0F3 V_MAD_F32 v89, v59, v88, v87, 0, 0 ; D2820059 055EB13B V_ADD_F32_e32 v90, v83, v83 ; 06B4A753 V_ADD_F32_e32 v90, -1.000000e+00, v90 ; 06B4B4F3 V_MAD_F32 v89, v59, v90, v89, 0, 0 ; D2820059 0566B53B V_MUL_F32_e32 v91, v71, v89 ; 10B6B347 S_LOAD_DWORDX4 s[0:3], s[4:5], 0xc ; C080050C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x18 ; C0C40718 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v52, 1, 0, 0, 0, 0, 0, 0, 0, v[52:53], s[8:15], s[0:3] ; F0800100 00023434 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v52, v52, -2.000000e+00, 1.000000e+00, 0, 0 ; D2820034 03C9EB34 V_MUL_F32_e32 v53, v59, v59 ; 106A773B V_MUL_F32_e32 v52, v53, v52 ; 10686935 S_LOAD_DWORDX4 s[16:19], s[4:5], 0x4 ; C0880504 S_LOAD_DWORDX8 s[20:27], s[6:7], 0x8 ; C0CA0708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v55, 1, 0, 0, 0, 0, 0, 0, 0, v[55:56], s[20:27], s[16:19] ; F0800100 00853737 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v55, v55, v55 ; 066E6F37 V_ADD_F32_e32 v55, -1.000000e+00, v55 ; 066E6EF3 V_MAD_F32 v52, v55, v53, v52, 0, 0 ; D2820034 04D26B37 V_MUL_F32_e32 v55, v59, v88 ; 106EB13B V_MUL_F32_e32 v56, v55, v63 ; 10707F37 V_MUL_F32_e32 v92, v59, v64 ; 10B8813B V_MAD_F32 v56, v92, v87, v56, 0, 0 ; D2820038 04E2AF5C V_ADD_F32_e32 v52, v52, v56 ; 06687134 IMAGE_SAMPLE v45, 1, 0, 0, 0, 0, 0, 0, 0, v[45:46], s[8:15], s[0:3] ; F0800100 00022D2D S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v45, v45, v45 ; 065A5B2D V_ADD_F32_e32 v45, -1.000000e+00, v45 ; 065A5AF3 V_MAD_F32 v45, v45, v53, v52, 0, 0 ; D282002D 04D26B2D V_MUL_F32_e32 v46, v59, v86 ; 105CAD3B V_MUL_F32_e32 v52, v55, v46 ; 10685D37 V_MUL_F32_e32 v56, v59, v90 ; 1070B53B V_MAD_F32 v52, v92, v56, v52, 0, 0 ; D2820034 04D2715C V_ADD_F32_e32 v45, v45, v52 ; 065A692D V_MUL_F32_e32 v52, v87, v46 ; 10685D57 V_MAD_F32 v52, v63, v56, v52, 0, 0 ; D2820034 04D2713F V_ADD_F32_e32 v45, v45, v52 ; 065A692D V_SUB_F32_e32 v45, v45, v91 ; 085AB72D V_ADD_F32_e32 v52, v45, v45 ; 06685B2D V_MUL_F32_e32 v86, v52, v62 ; 10AC7D34 V_MAD_F32 v58, v65, v61, v58, 0, 0 ; D282003A 04EA7B41 V_MUL_F32_e32 v58, v58, v60 ; 1074793A V_SUB_F32_e32 v58, v58, v89 ; 0874B33A V_MUL_F32_e32 v60, v53, v76 ; 10789935 V_MAD_F32 v60, v80, v53, v60, 0, 0 ; D282003C 04F26B50 V_MAD_F32 v61, v59, v64, v92, 0, 0 ; D282003D 0572813B V_MAD_F32 v60, v63, v61, v60, 0, 0 ; D282003C 04F27B3F V_MAD_F32 v60, v84, v53, v60, 0, 0 ; D282003C 04F26B54 V_MAD_F32 v60, v61, v46, v60, 0, 0 ; D282003C 04F25D3D V_MUL_F32_e32 v61, v63, v46 ; 107A5D3F V_MAD_F32 v46, v63, v46, v61, 0, 0 ; D282002E 04F65D3F V_ADD_F32_e32 v46, v60, v46 ; 065C5D3C V_MUL_F32_e32 v60, v71, v71 ; 10788F47 V_SUB_F32_e32 v46, v46, v60 ; 085C792E V_RCP_F32_e32 v57, v57 ; 7E725539 V_ADD_F32_e32 v60, v57, v46 ; 06785D39 V_MUL_F32_e32 v61, v60, v58 ; 107A753C V_SUB_F32_e32 v61, v61, v86 ; 087AAD3D V_MUL_F32_e32 v58, v58, v61 ; 10747B3A V_MUL_F32_e32 v61, v62, v62 ; 107A7D3E V_MUL_F32_e32 v62, v53, v77 ; 107C9B35 V_MAD_F32 v62, v81, v53, v62, 0, 0 ; D282003E 04FA6B51 V_MAD_F32 v55, v59, v88, v55, 0, 0 ; D2820037 04DEB13B V_MAD_F32 v59, v87, v55, v62, 0, 0 ; D282003B 04FA6F57 V_MAD_F32 v53, v85, v53, v59, 0, 0 ; D2820035 04EE6B55 V_MAD_F32 v53, v55, v56, v53, 0, 0 ; D2820035 04D67137 V_MUL_F32_e32 v55, v87, v56 ; 106E7157 V_MAD_F32 v55, v87, v56, v55, 0, 0 ; D2820037 04DE7157 V_ADD_F32_e32 v53, v53, v55 ; 066A6F35 V_MUL_F32_e32 v55, v89, v89 ; 106EB359 V_SUB_F32_e32 v53, v53, v55 ; 086A6F35 V_ADD_F32_e32 v55, v57, v53 ; 066E6B39 V_MAD_F32 v56, v61, v55, v58, 0, 0 ; D2820038 04EA6F3D V_MUL_F32_e32 v56, 5.000000e-01, v56 ; 107070F0 V_MUL_F32_e32 v55, v60, v55 ; 106E6F3C V_MUL_F32_e32 v45, v45, v45 ; 105A5B2D V_SUB_F32_e32 v55, v55, v45 ; 086E5B37 V_RCP_F32_e32 v57, v55 ; 7E725537 V_MOV_B32_e32 v58, -1.600000e+01 ; 7E7402FF C1800000 V_MAD_F32 v59, v57, v56, v58, 0, 0 ; D282003B 04EA7139 V_CMP_GE_F32_e64 s[0:1], v59, 0.000000e+00, 0, 0 ; D00C0000 0001013B V_MOV_B32_e32 v59, 0x80000000 ; 7E7602FF 80000000 V_CNDMASK_B32_e64 v60, v59, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D200003C 0001E53B V_MOV_B32_e32 v61, 0x80000000 ; 7E7A02FF 80000000 V_XOR_B32_e32 v62, v55, v61 ; 3A7C7B37 V_CMP_GE_F32_e64 s[0:1], v62, 0.000000e+00, 0, 0 ; D00C0000 0001013E V_CNDMASK_B32_e64 v62, v59, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D200003E 0001E53B V_ADD_F32_e32 v60, v60, v62 ; 06787D3C V_XOR_B32_e32 v60, v60, v61 ; 3A787B3C V_CMP_GE_F32_e64 s[2:3], v60, 0.000000e+00, 0, 0 ; D00C0002 0001013C V_MUL_F32_e32 v56, v57, v56 ; 10707139 V_MUL_F32_e32 v56, -1.442700e+00, v56 ; 107070FF BFB8AA65 V_EXP_F32_e32 v56, v56 ; 7E704B38 V_CMP_GE_F32_e32 vcc, v55, v70 ; 7C0C8D37 V_CMP_U_F32_e64 s[0:1], v55, v55, 0, 0 ; D0100000 00026F37 V_CNDMASK_B32_e64 v57, 0, -1, vcc, 0, 0, 0, 0 ; D2000039 01A98280 V_CNDMASK_B32_e64 v60, 0, -1, s[0:1], 0, 0, 0, 0 ; D200003C 00018280 V_OR_B32_e32 v57, v57, v60 ; 38727939 V_CMP_NE_I32_e64 s[0:1], v57, 0, 0, 0 ; D10A0000 00010139 V_CNDMASK_B32_e64 v57, v72, v55, s[0:1], 0, 0, 0, 0 ; D2000039 00026F48 V_RSQ_CLAMP_F32_e32 v57, v57 ; 7E725939 V_MUL_F32_e32 v56, v56, v57 ; 10707338 V_MUL_F32_e32 v54, v56, v54 ; 106C6D38 V_MUL_F32_e32 v54, 8.000000e-01, v54 ; 106C6CFF 3F4CCCCD V_CNDMASK_B32_e64 v54, v59, v54, s[2:3], 0, 0, 0, 0 ; D2000036 000A6D3B V_SUB_F32_e32 v57, v66, v51 ; 08726742 V_MAD_F32 v51, v48, v57, v51, 0, 0 ; D2820033 04CE7330 V_SUB_F32_e32 v57, v67, v50 ; 08726543 V_MAD_F32 v50, v48, v57, v50, 0, 0 ; D2820032 04CA7330 V_MUL_F32_e32 v57, v50, v50 ; 10726532 V_MAD_F32 v57, v51, v51, v57, 0, 0 ; D2820039 04E66733 V_SUB_F32_e32 v60, v73, v49 ; 08786349 V_MAD_F32 v48, v48, v60, v49, 0, 0 ; D2820030 04C67930 V_MAD_F32 v49, v48, v48, v57, 0, 0 ; D2820031 04E66130 V_CMP_GE_F32_e32 vcc, v49, v70 ; 7C0C8D31 V_CMP_U_F32_e64 s[0:1], v49, v49, 0, 0 ; D0100000 00026331 V_CNDMASK_B32_e64 v57, 0, -1, vcc, 0, 0, 0, 0 ; D2000039 01A98280 V_CNDMASK_B32_e64 v60, 0, -1, s[0:1], 0, 0, 0, 0 ; D200003C 00018280 V_OR_B32_e32 v57, v57, v60 ; 38727939 V_CMP_NE_I32_e64 s[0:1], v57, 0, 0, 0 ; D10A0000 00010139 V_CNDMASK_B32_e64 v49, v72, v49, s[0:1], 0, 0, 0, 0 ; D2000031 00026348 V_RSQ_CLAMP_F32_e32 v49, v49 ; 7E625931 V_MAD_F32 v50, v50, v49, v43, 0, 0 ; D2820032 04AE6332 V_MAD_F32 v48, v48, v49, v41, 0, 0 ; D2820030 04A66330 V_RCP_F32_e32 v48, v48 ; 7E605530 V_MUL_F32_e32 v50, v50, v48 ; 10646132 V_SUB_F32_e32 v50, v50, v71 ; 08648F32 V_MUL_F32_e32 v52, v50, v52 ; 10686932 V_MAD_F32 v49, v51, v49, v42, 0, 0 ; D2820031 04AA6333 V_MUL_F32_e32 v48, v49, v48 ; 10606131 V_SUB_F32_e32 v48, v48, v89 ; 0860B330 V_RCP_F32_e32 v47, v47 ; 7E5E552F V_ADD_F32_e32 v49, v47, v46 ; 06625D2F V_MUL_F32_e32 v51, v49, v48 ; 10666131 V_SUB_F32_e32 v51, v51, v52 ; 08666933 V_MUL_F32_e32 v51, v48, v51 ; 10666730 V_MUL_F32_e32 v50, v50, v50 ; 10646532 V_ADD_F32_e32 v57, v47, v53 ; 06726B2F V_MAD_F32 v51, v50, v57, v51, 0, 0 ; D2820033 04CE7332 V_MUL_F32_e32 v51, 5.000000e-01, v51 ; 106666F0 V_MUL_F32_e32 v49, v49, v57 ; 10627331 V_SUB_F32_e32 v49, v49, v45 ; 08625B31 V_RCP_F32_e32 v57, v49 ; 7E725531 V_MUL_F32_e32 v60, v57, v51 ; 10786739 V_MUL_F32_e32 v60, -1.442700e+00, v60 ; 107878FF BFB8AA65 V_EXP_F32_e32 v60, v60 ; 7E784B3C V_CMP_GE_F32_e32 vcc, v49, v70 ; 7C0C8D31 V_CMP_U_F32_e64 s[0:1], v49, v49, 0, 0 ; D0100000 00026331 V_CNDMASK_B32_e64 v61, 0, -1, vcc, 0, 0, 0, 0 ; D200003D 01A98280 V_CNDMASK_B32_e64 v62, 0, -1, s[0:1], 0, 0, 0, 0 ; D200003E 00018280 V_OR_B32_e32 v61, v61, v62 ; 387A7D3D V_CMP_NE_I32_e64 s[0:1], v61, 0, 0, 0 ; D10A0000 0001013D V_CNDMASK_B32_e64 v61, v72, v49, s[0:1], 0, 0, 0, 0 ; D200003D 00026348 V_RSQ_CLAMP_F32_e32 v61, v61 ; 7E7A593D V_MUL_F32_e32 v60, v61, v60 ; 1078793D V_MUL_F32_e32 v61, v60, v44 ; 107A593C V_MAD_F32 v51, v57, v51, v58, 0, 0 ; D2820033 04EA6739 V_CMP_GE_F32_e64 s[0:1], v51, 0.000000e+00, 0, 0 ; D00C0000 00010133 V_CNDMASK_B32_e64 v51, v59, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000033 0001E53B V_MOV_B32_e32 v57, 0x80000000 ; 7E7202FF 80000000 V_XOR_B32_e32 v49, v49, v57 ; 3A627331 V_CMP_GE_F32_e64 s[0:1], v49, 0.000000e+00, 0, 0 ; D00C0000 00010131 V_CNDMASK_B32_e64 v49, v59, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000031 0001E53B V_ADD_F32_e32 v49, v51, v49 ; 06626333 V_XOR_B32_e32 v49, v49, v57 ; 3A627331 V_CMP_GE_F32_e64 s[8:9], v49, 0.000000e+00, 0, 0 ; D00C0008 00010131 V_CNDMASK_B32_e64 v49, v59, v61, s[8:9], 0, 0, 0, 0 ; D2000031 00227B3B V_ADD_F32_e32 v49, v49, v54 ; 06626D31 V_MUL_F32_e32 v51, v71, v67 ; 10668747 V_MAD_F32 v51, v89, v66, v51, 0, 0 ; D2820033 04CE8559 V_MAD_F32 v51, v68, v69, v51, 0, 0 ; D2820033 04CE8B44 V_SUB_F32_e32 v51, 1.000000e+00, v51 ; 086666F2 V_MUL_F32_e32 v54, v51, v51 ; 106C6733 V_MUL_F32_e32 v54, v54, v54 ; 106C6D36 V_MUL_F32_e32 v57, v51, v54 ; 10726D33 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x18 ; C0860518 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x30 ; C0C80730 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[61:63], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800700 00643D03 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v64, v63 ; 7E804F3F V_MUL_LEGACY_F32_e32 v64, 2.200000e+00, v64 ; 0E8080FF 400CCCCD V_EXP_F32_e32 v64, v64 ; 7E804B40 V_MAD_F32 v49, v49, v57, v64, 0, 0 ; D2820031 05027331 V_MOV_B32_e32 v65, 1.600000e+01 ; 7E8202FF 41800000 V_MAD_F32 v66, v47, v65, v46, 0, 0 ; D2820042 04BA832F V_MUL_F32_e32 v67, v66, v48 ; 10866142 V_SUB_F32_e32 v52, v67, v52 ; 08686943 V_MUL_F32_e32 v48, v48, v52 ; 10606930 V_MAD_F32 v47, v47, v65, v53, 0, 0 ; D282002F 04D6832F V_MAD_F32 v48, v50, v47, v48, 0, 0 ; D2820030 04C25F32 V_MUL_F32_e32 v48, 5.000000e-01, v48 ; 106060F0 V_MUL_F32_e32 v47, v66, v47 ; 105E5F42 V_SUB_F32_e32 v45, v47, v45 ; 085A5B2F V_RCP_F32_e32 v47, v45 ; 7E5E552D V_MUL_F32_e32 v50, v48, v47 ; 10645F30 V_MUL_F32_e32 v50, -1.442700e+00, v50 ; 106464FF BFB8AA65 V_EXP_F32_e32 v50, v50 ; 7E644B32 V_CMP_GE_F32_e32 vcc, v45, v70 ; 7C0C8D2D V_CMP_U_F32_e64 s[0:1], v45, v45, 0, 0 ; D0100000 00025B2D V_CNDMASK_B32_e64 v52, 0, -1, vcc, 0, 0, 0, 0 ; D2000034 01A98280 V_CNDMASK_B32_e64 v65, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000041 00018280 V_OR_B32_e32 v52, v52, v65 ; 38688334 V_CMP_NE_I32_e64 s[0:1], v52, 0, 0, 0 ; D10A0000 00010134 V_CNDMASK_B32_e64 v52, v72, v45, s[0:1], 0, 0, 0, 0 ; D2000034 00025B48 V_RSQ_CLAMP_F32_e32 v52, v52 ; 7E685934 V_MUL_F32_e32 v50, v52, v50 ; 10646534 V_MUL_F32_e32 v50, 5.000000e-03, v50 ; 106464FF 3BA3D70A V_MUL_F32_e32 v44, v64, v44 ; 10585940 V_MUL_F32_e32 v44, v50, v44 ; 10585932 V_MAD_F32 v47, v48, v47, v58, 0, 0 ; D282002F 04EA5F30 V_CMP_GE_F32_e64 s[0:1], v47, 0.000000e+00, 0, 0 ; D00C0000 0001012F V_CNDMASK_B32_e64 v47, v59, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D200002F 0001E53B V_MOV_B32_e32 v48, 0x80000000 ; 7E6002FF 80000000 V_XOR_B32_e32 v45, v45, v48 ; 3A5A612D V_CMP_GE_F32_e64 s[0:1], v45, 0.000000e+00, 0, 0 ; D00C0000 0001012D V_CNDMASK_B32_e64 v45, v59, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D200002D 0001E53B V_ADD_F32_e32 v45, v47, v45 ; 065A5B2F V_XOR_B32_e32 v45, v45, v48 ; 3A5A612D V_CMP_GE_F32_e64 s[0:1], v45, 0.000000e+00, 0, 0 ; D00C0000 0001012D V_CNDMASK_B32_e64 v44, v59, v44, s[0:1], 0, 0, 0, 0 ; D200002C 0002593B V_MUL_F32_e32 v43, v43, v71 ; 10568F2B V_MAD_F32 v42, v42, v89, v43, 0, 0 ; D282002A 04AEB32A V_ADD_F32_e32 v41, v42, v41 ; 0652532A V_ADD_F32_e64 v41, v41, 0, 1, 0 ; D2060829 00010129 V_MAD_F32 v42, v49, v41, v44, 0, 0 ; D282002A 04B25331 V_MUL_F32_e32 v39, v39, v14 ; 104E1D27 V_MAD_F32 v39, v15, v40, v39, 0, 0 ; D2820027 049E510F V_MAD_F32 v37, v12, v37, v39, 0, 0 ; D2820025 049E4B0C V_ADD_F32_e32 v35, v37, v35 ; 06464725 V_MUL_F32_e32 v36, v36, v14 ; 10481D24 V_MAD_F32 v36, v15, v38, v36, 0, 0 ; D2820024 04924D0F V_MAD_F32 v34, v12, v34, v36, 0, 0 ; D2820022 0492450C V_ADD_F32_e32 v32, v34, v32 ; 06404122 V_RCP_F32_e32 v32, v32 ; 7E405520 V_MUL_F32_e32 v34, v32, v35 ; 10444720 V_MAD_F32 v34, v34, -5.000000e-01, -5.000000e-01, 0, 0 ; D2820022 03C5E322 V_MOV_B32_e32 v35, 6.000000e+00 ; 7E4602FF 40C00000 V_MAD_F32 v34, v53, v35, v34, 0, 0 ; D2820022 048A4735 V_MAD_F32 v37, v55, -2.000000e+00, v34, 0, 0 ; D2820025 0489EB37 V_ADD_F32_e32 v38, v55, v55 ; 064C6F37 V_MUL_F32_e32 v31, v31, v14 ; 103E1D1F V_MAD_F32 v31, v15, v33, v31, 0, 0 ; D282001F 047E430F V_MAD_F32 v30, v12, v30, v31, 0, 0 ; D282001E 047E3D0C V_ADD_F32_e32 v29, v30, v29 ; 063A3B1E V_MUL_F32_e32 v29, v32, v29 ; 103A3B20 V_MAD_F32 v29, v29, 5.000000e-01, -5.000000e-01, 0, 0 ; D282001D 03C5E11D V_MAD_F32 v29, v46, v35, v29, 0, 0 ; D282001D 0476472E V_ADD_F32_e32 v36, v38, v29 ; 06483B26 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x10 ; C0860510 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x20 ; C0C80720 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[16:23], s[12:15] ; F0800F00 00641E24 V_MOV_B32_e32 v39, 2.500000e-01 ; 7E4E02FF 3E800000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v40, v32, v39 ; 10504F20 V_ADD_F32_e32 v38, v38, v34 ; 064C4526 V_MOV_B32_e32 v43, v36 ; 7E560324 V_MOV_B32_e32 v44, v37 ; 7E580325 V_MOV_B32_e32 v44, v38 ; 7E580326 IMAGE_SAMPLE v[43:46], 15, 0, 0, 0, 0, 0, 0, 0, v[43:44], s[16:23], s[12:15] ; F0800F00 00642B2B S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v38, v45, v39, v40, 0, 0 ; D2820026 04A24F2D V_MAD_F32 v35, v55, v35, v34, 0, 0 ; D2820023 048A4737 V_MAD_F32 v34, v55, -2.000000e+00, v29, 0, 0 ; D2820022 0475EB37 IMAGE_SAMPLE v[64:67], 15, 0, 0, 0, 0, 0, 0, 0, v[34:35], s[16:23], s[12:15] ; F0800F00 00644022 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v29, v66, v39, v38, 0, 0 ; D282001D 049A4F42 V_MOV_B32_e32 v35, v37 ; 7E460325 IMAGE_SAMPLE v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[34:35], s[16:23], s[12:15] ; F0800F00 00642222 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v29, v36, v39, v29, 0, 0 ; D282001D 04764F24 V_MOV_B32_e32 v38, 0x7fffffff ; 7E4C02FF 7FFFFFFF V_AND_B32_e32 v29, v29, v38 ; 363A4D1D V_LOG_F32_e32 v29, v29 ; 7E3A4F1D V_MUL_F32_e32 v29, 2.200000e+00, v29 ; 103A3AFF 400CCCCD V_EXP_F32_e32 v29, v29 ; 7E3A4B1D V_MAD_F32 v29, v29, v24, v42, 0, 0 ; D282001D 04AA311D V_MAD_F32 v38, v51, v54, 5.000000e-01, 0, 0 ; D2820026 03C26D33 V_MUL_F32_e32 v29, v38, v29 ; 103A3B26 V_MOV_B32_e32 v40, 0x7fffffff ; 7E5002FF 7FFFFFFF V_AND_B32_e32 v29, v29, v40 ; 363A511D V_LOG_F32_e32 v29, v29 ; 7E3A4F1D V_MUL_F32_e32 v29, 4.545450e-01, v29 ; 103A3AFF 3EE8BA1F V_EXP_F32_e32 v53, v29 ; 7E6A4B1D V_MUL_F32_e32 v28, v56, v28 ; 10383938 V_MUL_F32_e32 v28, 8.000000e-01, v28 ; 103838FF 3F4CCCCD V_CNDMASK_B32_e64 v28, v59, v28, s[2:3], 0, 0, 0, 0 ; D200001C 000A393B V_MUL_F32_e32 v29, v60, v27 ; 103A373C V_CNDMASK_B32_e64 v29, v59, v29, s[8:9], 0, 0, 0, 0 ; D200001D 00223B3B V_ADD_F32_e32 v28, v29, v28 ; 0638391D V_LOG_F32_e32 v29, v62 ; 7E3A4F3E V_MUL_LEGACY_F32_e32 v29, 2.200000e+00, v29 ; 0E3A3AFF 400CCCCD V_EXP_F32_e32 v29, v29 ; 7E3A4B1D V_MAD_F32 v28, v28, v57, v29, 0, 0 ; D282001C 0476731C V_MUL_F32_e32 v27, v29, v27 ; 1036371D V_MUL_F32_e32 v27, v50, v27 ; 10363732 V_CNDMASK_B32_e64 v27, v59, v27, s[0:1], 0, 0, 0, 0 ; D200001B 0002373B V_MAD_F32 v27, v28, v41, v27, 0, 0 ; D282001B 046E531C V_MUL_F32_e32 v28, v31, v39 ; 10384F1F V_MAD_F32 v28, v44, v39, v28, 0, 0 ; D282001C 04724F2C V_MAD_F32 v28, v65, v39, v28, 0, 0 ; D282001C 04724F41 V_MAD_F32 v28, v35, v39, v28, 0, 0 ; D282001C 04724F23 V_MOV_B32_e32 v29, 0x7fffffff ; 7E3A02FF 7FFFFFFF V_AND_B32_e32 v28, v28, v29 ; 36383B1C V_LOG_F32_e32 v28, v28 ; 7E384F1C V_MUL_F32_e32 v28, 2.200000e+00, v28 ; 103838FF 400CCCCD V_EXP_F32_e32 v28, v28 ; 7E384B1C V_MAD_F32 v27, v28, v24, v27, 0, 0 ; D282001B 046E311C V_MUL_F32_e32 v27, v38, v27 ; 10363726 V_MOV_B32_e32 v28, 0x7fffffff ; 7E3802FF 7FFFFFFF V_AND_B32_e32 v27, v27, v28 ; 3636391B V_LOG_F32_e32 v27, v27 ; 7E364F1B V_MUL_F32_e32 v27, 4.545450e-01, v27 ; 103636FF 3EE8BA1F V_EXP_F32_e32 v52, v27 ; 7E684B1B V_MUL_F32_e32 v26, v56, v26 ; 10343538 V_MUL_F32_e32 v26, 8.000000e-01, v26 ; 103434FF 3F4CCCCD V_CNDMASK_B32_e64 v26, v59, v26, s[2:3], 0, 0, 0, 0 ; D200001A 000A353B V_MUL_F32_e32 v27, v60, v25 ; 1036333C V_CNDMASK_B32_e64 v27, v59, v27, s[8:9], 0, 0, 0, 0 ; D200001B 0022373B V_ADD_F32_e32 v26, v27, v26 ; 0634351B V_LOG_F32_e32 v27, v61 ; 7E364F3D V_MUL_LEGACY_F32_e32 v27, 2.200000e+00, v27 ; 0E3636FF 400CCCCD V_EXP_F32_e32 v27, v27 ; 7E364B1B V_MAD_F32 v26, v26, v57, v27, 0, 0 ; D282001A 046E731A V_MUL_F32_e32 v25, v27, v25 ; 1032331B V_MUL_F32_e32 v25, v50, v25 ; 10323332 V_CNDMASK_B32_e64 v25, v59, v25, s[0:1], 0, 0, 0, 0 ; D2000019 0002333B V_MAD_F32 v25, v26, v41, v25, 0, 0 ; D2820019 0466531A V_MUL_F32_e32 v26, v30, v39 ; 10344F1E V_MAD_F32 v26, v43, v39, v26, 0, 0 ; D282001A 046A4F2B V_MAD_F32 v26, v64, v39, v26, 0, 0 ; D282001A 046A4F40 V_MAD_F32 v26, v34, v39, v26, 0, 0 ; D282001A 046A4F22 V_MOV_B32_e32 v27, 0x7fffffff ; 7E3602FF 7FFFFFFF V_AND_B32_e32 v26, v26, v27 ; 3634371A V_LOG_F32_e32 v26, v26 ; 7E344F1A V_MUL_F32_e32 v26, 2.200000e+00, v26 ; 103434FF 400CCCCD V_EXP_F32_e32 v26, v26 ; 7E344B1A V_MAD_F32 v24, v26, v24, v25, 0, 0 ; D2820018 0466311A V_MUL_F32_e32 v24, v38, v24 ; 10303126 V_MOV_B32_e32 v25, 0x7fffffff ; 7E3202FF 7FFFFFFF V_AND_B32_e32 v24, v24, v25 ; 36303318 V_LOG_F32_e32 v24, v24 ; 7E304F18 V_MUL_F32_e32 v24, 4.545450e-01, v24 ; 103030FF 3EE8BA1F V_EXP_F32_e32 v51, v24 ; 7E664B18 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x2c ; C080052C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x58 ; C0C40758 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[8:15], s[0:3] ; F0800700 00021833 V_SUB_F32_e32 v11, 1.000000e+00, v13 ; 08161AF2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x20 ; C0800520 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x40 ; C0C40740 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 IMAGE_SAMPLE v[47:50], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[0:3] ; F0800F00 00022F0A S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v10, 1.000000e+00, v50 ; 081464F2 V_MAD_F32 v11, v26, v10, v49, 0, 0 ; D282000B 04C6151A V_SUB_F32_e32 v13, 1.000000e+00, v6 ; 081A0CF2 V_MUL_F32_e32 v13, 4.000000e-01, v13 ; 101A1AFF 3ECCCCCD V_MAD_F32 v53, v6, v11, v13, 0, 0 ; D2820035 04361706 V_MAD_F32 v11, v25, v10, v48, 0, 0 ; D282000B 04C21519 V_MAD_F32 v52, v6, v11, v13, 0, 0 ; D2820034 04361706 V_MAD_F32 v10, v24, v10, v47, 0, 0 ; D282000A 04BE1518 V_MAD_F32 v51, v6, v10, v13, 0, 0 ; D2820033 04361506 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x28 ; C0800528 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x50 ; C0C40750 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[8:15], s[0:3] ; F0800700 00021833 V_MUL_F32_e32 v10, v8, v6 ; 10140D08 V_SUB_F32_e32 v6, v6, v10 ; 080C1506 V_SUB_F32_e32 v7, 1.000000e+00, v6 ; 080E0CF2 V_MUL_F32_e32 v8, v7, v52 ; 10106907 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v8, v6, v25, v8, 0, 0 ; D2820008 04223306 V_SUB_F32_e32 v9, v18, v8 ; 08121112 V_SUB_F32_e32 v10, v14, v23 ; 08142F0E V_SUB_F32_e32 v11, v15, v22 ; 08162D0F V_MUL_F32_e32 v11, v11, v11 ; 1016170B V_MAD_F32 v10, v10, v10, v11, 0, 0 ; D282000A 042E150A V_SUB_F32_e32 v11, v12, v19 ; 0816270C V_MAD_F32 v10, v11, v11, v10, 0, 0 ; D282000A 042A170B V_MUL_F32_e32 v10, v10, v21 ; 10142B0A V_MUL_F32_e32 v12, v11, v20 ; 1018290B V_MUL_F32_e32 v12, 1.442700e+00, v12 ; 101818FF 3FB8AA65 V_EXP_F32_e32 v12, v12 ; 7E184B0C V_SUB_F32_e32 v12, 1.000000e+00, v12 ; 081818F2 V_MUL_F32_e32 v10, v12, v10 ; 1014150C V_RCP_F32_e32 v11, v11 ; 7E16550B V_MUL_F32_e32 v10, v11, v10 ; 1014150B V_MUL_F32_e32 v10, 1.442700e+00, v10 ; 101414FF 3FB8AA65 V_EXP_F32_e32 v10, v10 ; 7E144B0A V_ADD_F32_e64 v10, v10, 0, 1, 0 ; D206080A 0001010A V_SUB_F32_e32 v10, 1.000000e+00, v10 ; 081414F2 V_RCP_F32_e32 v2, v2 ; 7E045502 V_MUL_F32_e32 v2, v2, v5 ; 10040B02 V_MAD_F32 v16, v2, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820010 03C1E102 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x1c ; C080051C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x38 ; C0C40738 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v2, 1, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[8:15], s[0:3] ; F0800100 00020210 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v2, v10, v2 ; 1004050A V_MAD_F32 v5, v2, v9, v8, 0, 0 ; D2820005 04221302 V_MUL_F32_e32 v8, v7, v51 ; 10106707 V_MAD_F32 v8, v6, v24, v8, 0, 0 ; D2820008 04223106 V_SUB_F32_e32 v1, v1, v8 ; 08021101 V_MAD_F32 v1, v2, v1, v8, 0, 0 ; D2820001 04220302 V_CVT_PKRTZ_F16_F32_e32 v1, v1, v5 ; 5E020B01 V_MUL_F32_e32 v5, v7, v53 ; 100A6B07 V_MAD_F32 v5, v6, v26, v5, 0, 0 ; D2820005 04163506 V_SUB_F32_e32 v0, v0, v5 ; 08000B00 V_MAD_F32 v0, v2, v0, v5, 0, 0 ; D2820000 04160102 V_MUL_F32_e32 v2, v33, v39 ; 10044F21 V_MAD_F32 v2, v46, v39, v2, 0, 0 ; D2820002 040A4F2E V_MAD_F32 v2, v67, v39, v2, 0, 0 ; D2820002 040A4F43 V_MAD_F32 v2, v37, v39, v2, 0, 0 ; D2820002 040A4F25 V_ADD_F32_e32 v2, 1.000000e+00, v2 ; 060404F2 V_ADD_F32_e64 v2, v2, 0, 1, 0 ; D2060802 00010102 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x14 ; C0800514 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x28 ; C0C20728 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v3, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800800 00010303 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v2, v3, v2 ; 10040503 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v2 ; 5E000500 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL CONST[0..13] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 0.5000, -0.5000} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xyz, IN[1], IMM[0].xxxx, IMM[0].yyyy 1: MOV TEMP[0].xyz, TEMP[0].xyzx 2: MAD TEMP[1].x, IN[0].xxxx, IMM[0].zzzz, IMM[0].zzzz 3: MOV TEMP[2].xy, CONST[4].xyxx 4: ADD TEMP[3].yz, -TEMP[2].xxyw, CONST[5].xxyw 5: MAD TEMP[4].x, TEMP[1].xxxx, TEMP[3].yyyy, CONST[4].xxxx 6: MOV TEMP[2].x, TEMP[4].xxxx 7: MAD TEMP[5].x, IN[0].yyyy, IMM[0].zzzz, IMM[0].zzzz 8: MAD TEMP[3].y, TEMP[5].xxxx, TEMP[3].zzzz, CONST[4].yyyy 9: MOV TEMP[2].y, TEMP[3].yyyy 10: MAD TEMP[5].z, IN[1].wwww, CONST[6].xxxx, CONST[7].xxxx 11: MOV TEMP[2].z, TEMP[5].zzzz 12: ADD TEMP[6].xyz, -TEMP[2], CONST[12] 13: MOV TEMP[6].xyz, TEMP[6].xyzx 14: MUL TEMP[1], TEMP[3].yyyy, CONST[1] 15: MAD TEMP[1], TEMP[4].xxxx, CONST[0], TEMP[1] 16: MAD TEMP[1], TEMP[5].zzzz, CONST[2], TEMP[1] 17: ADD TEMP[1], TEMP[1], CONST[3] 18: RCP TEMP[7].x, TEMP[1].wwww 19: MOV TEMP[2].w, TEMP[7].xxxx 20: MUL TEMP[7].xy, TEMP[1], TEMP[7].xxxx 21: MOV TEMP[7].xy, TEMP[7].xyxx 22: MOV TEMP[8], TEMP[1] 23: ADD TEMP[9].xy, TEMP[2], CONST[13].zwzw 24: MOV TEMP[1].xy, TEMP[9].xyxx 25: MUL TEMP[9].xy, TEMP[1], CONST[13] 26: MOV TEMP[9].xy, TEMP[9].xyxx 27: MUL TEMP[3].xy, TEMP[3].yyyy, CONST[9] 28: MOV TEMP[1].xy, TEMP[3].xyxx 29: MOV TEMP[2].xyz, TEMP[2].xyzx 30: MAD TEMP[3].xy, TEMP[4].xxxx, CONST[8], TEMP[1] 31: MOV TEMP[1].xy, TEMP[3].xyxx 32: MAD TEMP[3].xy, TEMP[5].zzzz, CONST[10], TEMP[1] 33: MOV TEMP[1].xy, TEMP[3].xyxx 34: ADD TEMP[3].xy, TEMP[1], CONST[11] 35: MOV TEMP[1].xy, TEMP[3].xyxx 36: MAD TEMP[1].xy, TEMP[1], IMM[0].zwyw, IMM[0].wwww 37: MOV TEMP[1].xy, TEMP[1].xyxx 38: MOV TEMP[0].w, IMM[1].xxxx 39: MOV TEMP[2].w, IMM[1].xxxx 40: MOV TEMP[6].w, IMM[1].xxxx 41: MOV TEMP[7].zw, IMM[1].xxyx 42: MOV TEMP[1].zw, IMM[1].xxyx 43: MOV TEMP[9].zw, IMM[1].xxyx 44: MOV OUT[6], TEMP[9] 45: MOV OUT[1], TEMP[0] 46: MOV OUT[2], TEMP[2] 47: MOV OUT[0], TEMP[8] 48: MOV OUT[3], TEMP[6] 49: MOV OUT[4], TEMP[7] 50: MOV OUT[5], TEMP[1] 51: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %50 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %57 = load <16 x i8> addrspace(2)* %56, !tbaa !0 %58 = add i32 %5, %7 %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %58) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = extractelement <4 x float> %59, i32 3 %64 = fmul float %60, 2.000000e+00 %65 = fadd float %64, -1.000000e+00 %66 = fmul float %61, 2.000000e+00 %67 = fadd float %66, -1.000000e+00 %68 = fmul float %62, 2.000000e+00 %69 = fadd float %68, -1.000000e+00 %70 = fmul float %54, 5.000000e-01 %71 = fadd float %70, 5.000000e-01 %72 = fsub float -0.000000e+00, %29 %73 = fadd float %72, %31 %74 = fsub float -0.000000e+00, %30 %75 = fadd float %74, %32 %76 = fmul float %71, %73 %77 = fadd float %76, %29 %78 = fmul float %55, 5.000000e-01 %79 = fadd float %78, 5.000000e-01 %80 = fmul float %79, %75 %81 = fadd float %80, %30 %82 = fmul float %63, %33 %83 = fadd float %82, %34 %84 = fsub float -0.000000e+00, %77 %85 = fadd float %84, %43 %86 = fsub float -0.000000e+00, %81 %87 = fadd float %86, %44 %88 = fsub float -0.000000e+00, %83 %89 = fadd float %88, %45 %90 = fmul float %81, %17 %91 = fmul float %81, %18 %92 = fmul float %81, %19 %93 = fmul float %81, %20 %94 = fmul float %77, %13 %95 = fadd float %94, %90 %96 = fmul float %77, %14 %97 = fadd float %96, %91 %98 = fmul float %77, %15 %99 = fadd float %98, %92 %100 = fmul float %77, %16 %101 = fadd float %100, %93 %102 = fmul float %83, %21 %103 = fadd float %102, %95 %104 = fmul float %83, %22 %105 = fadd float %104, %97 %106 = fmul float %83, %23 %107 = fadd float %106, %99 %108 = fmul float %83, %24 %109 = fadd float %108, %101 %110 = fadd float %103, %25 %111 = fadd float %105, %26 %112 = fadd float %107, %27 %113 = fadd float %109, %28 %114 = fdiv float 1.000000e+00, %113 %115 = fmul float %110, %114 %116 = fmul float %111, %114 %117 = fadd float %77, %48 %118 = fadd float %81, %49 %119 = fmul float %117, %46 %120 = fmul float %118, %47 %121 = fmul float %81, %37 %122 = fmul float %81, %38 %123 = fmul float %77, %35 %124 = fadd float %123, %121 %125 = fmul float %77, %36 %126 = fadd float %125, %122 %127 = fmul float %83, %39 %128 = fadd float %127, %124 %129 = fmul float %83, %40 %130 = fadd float %129, %126 %131 = fadd float %128, %41 %132 = fadd float %130, %42 %133 = fmul float %131, 5.000000e-01 %134 = fadd float %133, -5.000000e-01 %135 = fmul float %132, -5.000000e-01 %136 = fadd float %135, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %65, float %67, float %69, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %77, float %81, float %83, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %85, float %87, float %89, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %115, float %116, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %134, float %136, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %119, float %120, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %110, float %111, float %112, float %113) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v5, v3, v3 ; 060A0703 V_ADD_F32_e32 v5, -1.000000e+00, v5 ; 060A0AF3 V_ADD_F32_e32 v6, v2, v2 ; 060C0502 V_ADD_F32_e32 v6, -1.000000e+00, v6 ; 060C0CF3 V_ADD_F32_e32 v7, v1, v1 ; 060E0301 V_ADD_F32_e32 v7, -1.000000e+00, v7 ; 060E0EF3 V_MOV_B32_e32 v8, 1.000000e+00 ; 7E1002F2 EXP 15, 32, 0, 0, 0, v7, v6, v5, v8 ; F800020F 08050607 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x11 ; C2028111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s5 ; 7E0A0205 V_SUB_F32_e32 v5, s4, v5 ; 080A0A04 S_LOAD_DWORDX4 s[8:11], s[8:9], 0x0 ; C0840900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[9:12], s[8:11][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80020900 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v10, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820000 03C1E10A V_MAD_F32 v0, v0, v5, s5, 0, 0 ; D2820000 00160B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x10 ; C2028110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s5 ; 7E0A0205 V_SUB_F32_e32 v5, s4, v5 ; 080A0A04 V_MAD_F32 v6, v9, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820006 03C1E109 V_MAD_F32 v5, v6, v5, s5, 0, 0 ; D2820005 00160B06 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1c ; C202011C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v1, v4, v7, v6, 0, 0 ; D2820001 041A0F04 EXP 15, 33, 0, 0, 0, v5, v0, v1, v8 ; F800021F 08010005 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x31 ; C2020131 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_SUB_F32_e32 v2, s4, v0 ; 08040004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x30 ; C2020130 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUB_F32_e32 v3, s4, v5 ; 08060A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x32 ; C2020132 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUB_F32_e32 v4, s4, v1 ; 08080204 EXP 15, 34, 0, 0, 0, v3, v2, v4, v8 ; F800022F 08040203 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v2, s4, v0 ; 10040004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v5, s4, v2, 0, 0 ; D2820002 04080905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v1, s4, v2, 0, 0 ; D2820002 04080901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v2, s4, v2 ; 06040404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v5, s4, v3, 0, 0 ; D2820003 040C0905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v1, s4, v3, 0, 0 ; D2820003 040C0901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_RCP_F32_e32 v4, v3 ; 7E085503 V_MUL_F32_e32 v6, v2, v4 ; 100C0902 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v0 ; 100E0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v5, s4, v7, 0, 0 ; D2820007 041C0905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v1, s4, v7, 0, 0 ; D2820007 041C0901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xc ; C202010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v7, s4, v7 ; 060E0E04 V_MUL_F32_e32 v4, v7, v4 ; 10080907 V_MOV_B32_e32 v9, 0.000000e+00 ; 7E120280 EXP 15, 35, 0, 0, 0, v4, v6, v9, v8 ; F800023F 08090604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x25 ; C2020125 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v4, s4, v0 ; 10080004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v5, s4, v4, 0, 0 ; D2820004 04100905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x29 ; C2020129 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v1, s4, v4, 0, 0 ; D2820004 04100901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2d ; C202012D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s4, v4 ; 06080804 V_MAD_F32 v4, v4, -5.000000e-01, -5.000000e-01, 0, 0 ; D2820004 03C5E304 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x24 ; C2020124 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v0 ; 100C0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x20 ; C2020120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v5, s4, v6, 0, 0 ; D2820006 04180905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x28 ; C2020128 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v1, s4, v6, 0, 0 ; D2820006 04180901 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2c ; C202012C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s4, v6 ; 060C0C04 V_MAD_F32 v6, v6, 5.000000e-01, -5.000000e-01, 0, 0 ; D2820006 03C5E106 EXP 15, 36, 0, 0, 0, v6, v4, v9, v8 ; F800024F 08090406 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x37 ; C2020137 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_ADD_F32_e32 v4, s4, v0 ; 06080004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x35 ; C2020135 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v4 ; 10080804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x36 ; C2020136 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s4, v5 ; 060C0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x34 ; C2020134 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v6 ; 100C0C04 EXP 15, 37, 0, 0, 0, v6, v4, v9, v8 ; F800025F 08090406 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v5, s4, v0, 0, 0 ; D2820000 04000905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v1, s4, v0, 0, 0 ; D2820000 04000901 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xe ; C200010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 EXP 15, 12, 0, 1, 0, v7, v2, v0, v3 ; F80008CF 03000207 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..16] DCL TEMP[0..28], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, -0.0000} IMM[1] FLT32 { 2.2000, 1.0000, -1.0000, 2.0000} IMM[2] FLT32 { 0.5000, -16.0000, -1.4427, 0.0000} IMM[3] FLT32 { 0.3000, 0.5900, 0.1100, 1.4427} IMM[4] FLT32 { 0.4545, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[4], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[7], 2D 3: ABS TEMP[1].x, TEMP[0] 4: MOV TEMP[2], -TEMP[1].xxxx 5: FSGE TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz 6: UIF TEMP[3].xxxx :0 7: MOV TEMP[3].x, IMM[0].yyyy 8: ELSE :0 9: MOV TEMP[3].x, IMM[0].wwww 10: ENDIF 11: MOV TEMP[3].x, TEMP[3].xxxx 12: FSGE TEMP[4].x, TEMP[2].yyyy, IMM[0].zzzz 13: UIF TEMP[4].xxxx :0 14: MOV TEMP[4].x, IMM[0].yyyy 15: ELSE :0 16: MOV TEMP[4].x, IMM[0].wwww 17: ENDIF 18: MOV TEMP[3].y, TEMP[4].xxxx 19: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[0].zzzz 20: UIF TEMP[4].xxxx :0 21: MOV TEMP[4].x, IMM[0].yyyy 22: ELSE :0 23: MOV TEMP[4].x, IMM[0].wwww 24: ENDIF 25: MOV TEMP[3].z, TEMP[4].xxxx 26: FSGE TEMP[2].x, TEMP[2].wwww, IMM[0].zzzz 27: UIF TEMP[2].xxxx :0 28: MOV TEMP[2].x, IMM[0].yyyy 29: ELSE :0 30: MOV TEMP[2].x, IMM[0].wwww 31: ENDIF 32: MOV TEMP[3].w, TEMP[2].xxxx 33: MOV TEMP[2].w, TEMP[3] 34: FSLT TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz 35: OR TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz 36: OR TEMP[4].x, TEMP[4].xxxx, TEMP[3].yyyy 37: UIF TEMP[4].xxxx :0 38: KILL 39: ENDIF 40: MOV TEMP[3].w, IMM[0].zzzz 41: MOV TEMP[3].x, IN[0].xxxx 42: MOV TEMP[3].y, IN[0].yyyy 43: MOV TEMP[3].z, IN[0].zzzz 44: DP4 TEMP[4].x, TEMP[3], TEMP[3] 45: RSQ TEMP[4].x, TEMP[4].xxxx 46: MUL TEMP[3].xyz, TEMP[3], TEMP[4].xxxx 47: MOV TEMP[2].xyz, TEMP[3].xyzx 48: MOV TEMP[4].w, IMM[0].zzzz 49: MOV TEMP[4].x, IN[2].xxxx 50: MOV TEMP[4].y, IN[2].yyyy 51: MOV TEMP[4].z, IN[2].zzzz 52: DP4 TEMP[5].x, TEMP[4], TEMP[4] 53: RSQ TEMP[5].x, TEMP[5].xxxx 54: MUL TEMP[4].xyz, TEMP[4], TEMP[5].xxxx 55: ABS TEMP[5], TEMP[2] 56: ABS TEMP[6], TEMP[2] 57: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 58: MUL TEMP[6].xyw, TEMP[5].xyzz, TEMP[5].xyzz 59: ADD TEMP[7].y, TEMP[6].yyyy, TEMP[6].xxxx 60: MAD TEMP[7].y, TEMP[5].zzzz, TEMP[5].zzzz, TEMP[7].yyyy 61: RCP TEMP[7].x, TEMP[7].yyyy 62: MUL TEMP[6].xyz, TEMP[7].xxxx, TEMP[6].xyww 63: MUL TEMP[7].yw, CONST[9].xxzy, IN[1].xyzz 64: MOV TEMP[7].xy, TEMP[7].ywww 65: TEX TEMP[7], TEMP[7], SAMP[3], 2D 66: POW TEMP[8].x, TEMP[7].xxxx, IMM[1].xxxx 67: POW TEMP[8].y, TEMP[7].yyyy, IMM[1].xxxx 68: POW TEMP[8].z, TEMP[7].zzzz, IMM[1].xxxx 69: POW TEMP[8].w, TEMP[7].wwww, IMM[1].yyyy 70: MOV TEMP[7].w, TEMP[8].wwww 71: MUL TEMP[9].yw, CONST[9].xxzy, IN[1].xxzz 72: MOV TEMP[9].xy, TEMP[9].ywww 73: TEX TEMP[9], TEMP[9], SAMP[3], 2D 74: POW TEMP[10].x, TEMP[9].xxxx, IMM[1].xxxx 75: POW TEMP[10].y, TEMP[9].yyyy, IMM[1].xxxx 76: POW TEMP[10].z, TEMP[9].zzzz, IMM[1].xxxx 77: POW TEMP[10].w, TEMP[9].wwww, IMM[1].yyyy 78: MOV TEMP[9].w, TEMP[10].wwww 79: MUL TEMP[10].xyz, TEMP[6].yyyy, TEMP[10] 80: MOV TEMP[9].xyz, TEMP[10].xyzx 81: MAD TEMP[8].xyz, TEMP[8], TEMP[6].xxxx, TEMP[9] 82: MOV TEMP[7].xyz, TEMP[8].xyzx 83: MUL TEMP[8].yw, CONST[4].xxxx, IN[1].xxzy 84: MOV TEMP[8].xy, TEMP[8].ywww 85: TEX TEMP[8], TEMP[8], SAMP[1], 2D 86: POW TEMP[10].x, TEMP[8].xxxx, IMM[1].xxxx 87: POW TEMP[10].y, TEMP[8].yyyy, IMM[1].xxxx 88: POW TEMP[10].z, TEMP[8].zzzz, IMM[1].xxxx 89: POW TEMP[10].w, TEMP[8].wwww, IMM[1].yyyy 90: MAD TEMP[8].xyz, TEMP[10], TEMP[6].zzzz, TEMP[7] 91: MOV TEMP[7].xyz, TEMP[8].xyzx 92: MUL TEMP[10].yw, CONST[10].xxzy, IN[1].xyzz 93: MOV TEMP[2].w, -TEMP[3].zzzz 94: DP3 TEMP[11].x, TEMP[2].wyxx, TEMP[4].xyzz 95: MOV TEMP[9].y, TEMP[11].xxxx 96: MUL TEMP[11].xyz, TEMP[2].xzyw, IMM[1].yyzw 97: DP3 TEMP[12].x, TEMP[11].zxyy, TEMP[4].xyzz 98: MOV TEMP[9].w, TEMP[12].xxxx 99: DP3 TEMP[12].x, TEMP[3].xyzz, TEMP[4].xyzz 100: MOV TEMP[9].z, TEMP[12].xxxx 101: MUL TEMP[12].xyz, TEMP[2].zyxw, IMM[1].zyyw 102: DP3 TEMP[13].x, TEMP[12].xyzz, CONST[1].xyzz 103: MOV TEMP[13].z, TEMP[13].xxxx 104: DP3 TEMP[14].x, TEMP[11].zxyy, CONST[1].xyzz 105: MOV TEMP[13].w, TEMP[14].xxxx 106: DP3 TEMP[13].x, TEMP[3].xyzz, CONST[1].xyzz 107: DP3 TEMP[14].x, TEMP[12].xyzz, CONST[2].xyzz 108: MOV TEMP[12].y, TEMP[14].xxxx 109: DP3 TEMP[14].x, TEMP[11].zxyy, CONST[2].xyzz 110: MOV TEMP[12].w, TEMP[14].xxxx 111: DP3 TEMP[14].x, TEMP[3].xyzz, CONST[2].xyzz 112: MOV TEMP[12].z, TEMP[14].xxxx 113: MOV TEMP[14].xy, TEMP[10].ywww 114: TEX TEMP[14].zw, TEMP[14], SAMP[5], 2D 115: MOV TEMP[15].zw, TEMP[14].wwzw 116: MOV TEMP[16].xy, TEMP[10].ywww 117: TEX TEMP[16], TEMP[16], SAMP[4], 2D 118: MOV TEMP[17].w, TEMP[16].wwww 119: MUL TEMP[18].y, CONST[3].xxxx, CONST[3].xxxx 120: MAD TEMP[19].xy, TEMP[16], IMM[1].wwww, IMM[0].yyyy 121: MOV TEMP[15].xy, TEMP[19].xyxx 122: MUL TEMP[19].xy, TEMP[15], CONST[3].xxxx 123: MOV TEMP[15].xy, TEMP[19].xyxx 124: MOV TEMP[15].z, IMM[0].xxxx 125: MOV TEMP[20].w, IMM[0].zzzz 126: MOV TEMP[20].x, TEMP[19].xxxx 127: MOV TEMP[20].y, TEMP[19].yyyy 128: MOV TEMP[20].z, IMM[0].xxxx 129: DP4 TEMP[19].x, TEMP[20], TEMP[20] 130: RSQ TEMP[19].x, TEMP[19].xxxx 131: MUL TEMP[19].xyz, TEMP[20], TEMP[19].xxxx 132: MAD TEMP[14].z, TEMP[14].wwww, IMM[1].wwww, IMM[0].yyyy 133: MOV TEMP[14].z, TEMP[14].zzzz 134: MOV TEMP[14].xy, TEMP[16].zwzz 135: MUL TEMP[16].xyz, TEMP[15].xyyw, TEMP[15].xyxw 136: MOV TEMP[17].xyz, TEMP[16].xyzx 137: MAD TEMP[16].xyz, TEMP[14], TEMP[18].yyyy, -TEMP[17] 138: DP3 TEMP[20].x, TEMP[13].zwxx, TEMP[19].xyzz 139: MOV_SAT TEMP[20].x, TEMP[20].xxxx 140: DP3 TEMP[21].x, TEMP[12].ywzz, TEMP[19].xyzz 141: MOV_SAT TEMP[21].x, TEMP[21].xxxx 142: MOV TEMP[2].w, TEMP[21].xxxx 143: ADD TEMP[22].xyz, TEMP[9].ywzw, TEMP[12].ywzw 144: MOV TEMP[14].xyz, TEMP[22].xyzx 145: RCP TEMP[22].x, TEMP[22].zzzz 146: MAD TEMP[22].xy, TEMP[14], TEMP[22].xxxx, -TEMP[15] 147: RCP TEMP[23].x, CONST[6].xxxx 148: ADD TEMP[24].zw, TEMP[23].xxxx, TEMP[16].xyxy 149: MUL TEMP[25].w, TEMP[16].zzzz, TEMP[16].zzzz 150: MAD TEMP[25].w, TEMP[24].zzzz, TEMP[24].wwww, -TEMP[25].wwww 151: MUL TEMP[26].w, TEMP[22].xxxx, TEMP[22].xxxx 152: MUL TEMP[27].w, TEMP[22].yyyy, TEMP[24].wwww 153: DP2 TEMP[16].x, TEMP[22].xxxx, -TEMP[16].zzzz 154: ADD TEMP[16].x, TEMP[16].xxxx, TEMP[27].wwww 155: MUL TEMP[16].w, TEMP[22].yyyy, TEMP[16].xxxx 156: MAD TEMP[16].w, TEMP[26].wwww, TEMP[24].wwww, TEMP[16].wwww 157: MUL TEMP[16].w, TEMP[16].wwww, IMM[2].xxxx 158: RCP TEMP[22].x, TEMP[25].wwww 159: MUL TEMP[24].z, TEMP[16].wwww, TEMP[22].xxxx 160: MOV TEMP[26].x, -TEMP[25].wwww 161: FSGE TEMP[26].x, TEMP[26].xxxx, IMM[0].zzzz 162: UIF TEMP[26].xxxx :0 163: MOV TEMP[26].x, IMM[0].xxxx 164: ELSE :0 165: MOV TEMP[26].x, IMM[0].zzzz 166: ENDIF 167: MAD TEMP[16].w, TEMP[16].wwww, TEMP[22].xxxx, IMM[2].yyyy 168: FSGE TEMP[16].x, TEMP[16].wwww, IMM[0].zzzz 169: UIF TEMP[16].xxxx :0 170: MOV TEMP[16].x, IMM[0].xxxx 171: ELSE :0 172: MOV TEMP[16].x, IMM[0].zzzz 173: ENDIF 174: ADD TEMP[16].w, TEMP[16].xxxx, TEMP[26].xxxx 175: MUL TEMP[22].w, TEMP[24].zzzz, IMM[2].zzzz 176: EX2 TEMP[22].x, TEMP[22].wwww 177: MAX TEMP[24].x, TEMP[25].wwww, IMM[2].wwww 178: RSQ TEMP[24].x, TEMP[24].xxxx 179: MUL TEMP[22].w, TEMP[24].xxxx, TEMP[22].xxxx 180: MOV TEMP[16].x, -TEMP[16].wwww 181: FSGE TEMP[16].x, TEMP[16].xxxx, IMM[0].zzzz 182: UIF TEMP[16].xxxx :0 183: MOV TEMP[16].x, TEMP[22].wwww 184: ELSE :0 185: MOV TEMP[16].x, IMM[0].zzzz 186: ENDIF 187: DP3 TEMP[22].x, TEMP[19].xzyy, TEMP[9].yzww 188: ADD TEMP[22].w, -TEMP[22].xxxx, IMM[0].xxxx 189: MUL TEMP[24].w, TEMP[22].wwww, TEMP[22].wwww 190: MUL TEMP[24].w, TEMP[24].wwww, TEMP[24].wwww 191: MUL TEMP[22].w, TEMP[22].wwww, TEMP[24].wwww 192: MOV TEMP[7].w, TEMP[22].wwww 193: MUL TEMP[24].xyz, TEMP[2].zyxw, IMM[1].zzyw 194: DP3 TEMP[15].x, TEMP[19].xyzz, TEMP[24].xyzz 195: DP3 TEMP[24].x, TEMP[19].yxzz, TEMP[3].xyyy 196: MOV TEMP[15].y, TEMP[24].xxxx 197: DP3 TEMP[19].x, TEMP[19].xyzz, TEMP[3].xzzz 198: MOV TEMP[15].z, TEMP[19].xxxx 199: MOV TEMP[19].xyz, TEMP[15].xyzz 200: TEX TEMP[19], TEMP[19], SAMP[0], CUBE 201: POW TEMP[24].x, TEMP[19].xxxx, IMM[1].xxxx 202: POW TEMP[24].y, TEMP[19].yyyy, IMM[1].xxxx 203: POW TEMP[24].z, TEMP[19].zzzz, IMM[1].xxxx 204: POW TEMP[24].w, TEMP[19].wwww, IMM[1].yyyy 205: MOV TEMP[15].w, TEMP[24].wwww 206: ADD TEMP[19].xyz, TEMP[20].xxxx, TEMP[24] 207: MOV TEMP[15].xyz, TEMP[19].xyzx 208: MUL TEMP[19].xyz, TEMP[7], TEMP[15] 209: MOV TEMP[15].xyz, TEMP[19].xyzx 210: MUL TEMP[16], TEMP[16].xxxx, TEMP[22].wwww 211: MOV_SAT TEMP[16], TEMP[16] 212: MAD TEMP[15].xyz, TEMP[16].wwww, TEMP[21].xxxx, TEMP[15] 213: MUL TEMP[16].xy, CONST[10], IN[1].xzzw 214: MUL TEMP[19].xyz, TEMP[2].zyxw, IMM[1].yyzw 215: DP3 TEMP[9].x, TEMP[19].zxyy, TEMP[4].xyzz 216: DP3 TEMP[20].x, TEMP[19].zxyy, CONST[1].xyzz 217: MOV TEMP[13].y, TEMP[20].xxxx 218: DP3 TEMP[12].x, TEMP[19].zxyy, CONST[2].xyzz 219: MOV TEMP[20].xy, TEMP[16].xyyy 220: TEX TEMP[20].w, TEMP[20], SAMP[5], 2D 221: MOV TEMP[14].w, TEMP[20].wwww 222: MOV TEMP[16].xy, TEMP[16].xyyy 223: TEX TEMP[16], TEMP[16], SAMP[4], 2D 224: MOV TEMP[17].zw, TEMP[16].wwzw 225: MAD TEMP[21].xy, TEMP[16], IMM[1].wwww, IMM[0].yyyy 226: MOV TEMP[17].xy, TEMP[21].xyxx 227: MUL TEMP[21].xy, TEMP[17], CONST[3].xxxx 228: MOV TEMP[14].xy, TEMP[21].xyxx 229: MOV TEMP[22].w, IMM[0].zzzz 230: MOV TEMP[22].x, TEMP[21].xxxx 231: MOV TEMP[22].y, TEMP[21].yyyy 232: MOV TEMP[22].z, IMM[0].xxxx 233: DP4 TEMP[21].x, TEMP[22], TEMP[22] 234: RSQ TEMP[21].x, TEMP[21].xxxx 235: MUL TEMP[21].xyz, TEMP[22], TEMP[21].xxxx 236: MAD TEMP[20].z, TEMP[20].wwww, IMM[1].wwww, IMM[0].yyyy 237: MOV TEMP[20].z, TEMP[20].zzzz 238: MOV TEMP[20].xy, TEMP[16].zwzz 239: MUL TEMP[16].xyz, TEMP[14].xyyw, TEMP[14].xyxw 240: MOV TEMP[17].xyz, TEMP[16].xyzx 241: MAD TEMP[16].xyz, TEMP[20], TEMP[18].yyyy, -TEMP[17] 242: MOV TEMP[17].xyz, TEMP[16].xyzx 243: DP3 TEMP[20].x, TEMP[13].ywxx, TEMP[21].xyzz 244: MOV_SAT TEMP[20].x, TEMP[20].xxxx 245: DP3 TEMP[22].x, TEMP[12].xwzz, TEMP[21].xyzz 246: MOV_SAT TEMP[22].x, TEMP[22].xxxx 247: ADD TEMP[24].yzw, TEMP[9].xxwz, TEMP[12].xxwz 248: MOV TEMP[13].yzw, TEMP[24].zyzw 249: RCP TEMP[24].x, TEMP[24].wwww 250: MAD TEMP[24].yz, TEMP[13], TEMP[24].xxxx, -TEMP[14].xxyw 251: ADD TEMP[25].xy, TEMP[23].xxxx, TEMP[17] 252: MUL TEMP[26].w, TEMP[16].zzzz, TEMP[16].zzzz 253: MAD TEMP[26].w, TEMP[25].xxxx, TEMP[25].yyyy, -TEMP[26].wwww 254: MUL TEMP[27].w, TEMP[24].yyyy, TEMP[24].yyyy 255: MUL TEMP[28].w, TEMP[24].zzzz, TEMP[25].yyyy 256: DP2 TEMP[16].x, TEMP[24].yyyy, -TEMP[16].zzzz 257: ADD TEMP[16].x, TEMP[16].xxxx, TEMP[28].wwww 258: MUL TEMP[16].w, TEMP[24].zzzz, TEMP[16].xxxx 259: MAD TEMP[16].w, TEMP[27].wwww, TEMP[25].yyyy, TEMP[16].wwww 260: MUL TEMP[16].w, TEMP[16].wwww, IMM[2].xxxx 261: RCP TEMP[24].x, TEMP[26].wwww 262: MUL TEMP[25].w, TEMP[16].wwww, TEMP[24].xxxx 263: MOV TEMP[12].w, TEMP[25].wwww 264: MOV TEMP[27].x, -TEMP[26].wwww 265: FSGE TEMP[27].x, TEMP[27].xxxx, IMM[0].zzzz 266: UIF TEMP[27].xxxx :0 267: MOV TEMP[27].x, IMM[0].xxxx 268: ELSE :0 269: MOV TEMP[27].x, IMM[0].zzzz 270: ENDIF 271: MAD TEMP[16].w, TEMP[16].wwww, TEMP[24].xxxx, IMM[2].yyyy 272: FSGE TEMP[16].x, TEMP[16].wwww, IMM[0].zzzz 273: UIF TEMP[16].xxxx :0 274: MOV TEMP[16].x, IMM[0].xxxx 275: ELSE :0 276: MOV TEMP[16].x, IMM[0].zzzz 277: ENDIF 278: ADD TEMP[16].w, TEMP[16].xxxx, TEMP[27].xxxx 279: MUL TEMP[24].w, TEMP[25].wwww, IMM[2].zzzz 280: EX2 TEMP[24].x, TEMP[24].wwww 281: MAX TEMP[25].x, TEMP[26].wwww, IMM[2].wwww 282: RSQ TEMP[25].x, TEMP[25].xxxx 283: MUL TEMP[24].w, TEMP[25].xxxx, TEMP[24].xxxx 284: MOV TEMP[16].x, -TEMP[16].wwww 285: FSGE TEMP[16].x, TEMP[16].xxxx, IMM[0].zzzz 286: UIF TEMP[16].xxxx :0 287: MOV TEMP[16].x, TEMP[24].wwww 288: ELSE :0 289: MOV TEMP[16].x, IMM[0].zzzz 290: ENDIF 291: DP3 TEMP[24].x, TEMP[21].xzyy, TEMP[9].xzww 292: ADD TEMP[24].w, -TEMP[24].xxxx, IMM[0].xxxx 293: MUL TEMP[25].w, TEMP[24].wwww, TEMP[24].wwww 294: MUL TEMP[25].w, TEMP[25].wwww, TEMP[25].wwww 295: MOV TEMP[9].w, TEMP[25].wwww 296: MUL TEMP[24].w, TEMP[24].wwww, TEMP[25].wwww 297: MUL TEMP[25].yzw, TEMP[3].xxyx, IMM[0].yyyx 298: DP3 TEMP[17].x, TEMP[21].xyzz, TEMP[25].yzww 299: DP3 TEMP[25].x, TEMP[21].yzxx, TEMP[3].xyzz 300: MOV TEMP[17].y, TEMP[25].xxxx 301: DP3 TEMP[21].x, TEMP[21].xyzz, TEMP[3].yzzz 302: MOV TEMP[17].z, TEMP[21].xxxx 303: MOV TEMP[21].xyz, TEMP[17].xyzz 304: TEX TEMP[21], TEMP[21], SAMP[0], CUBE 305: POW TEMP[25].x, TEMP[21].xxxx, IMM[1].xxxx 306: POW TEMP[25].y, TEMP[21].yyyy, IMM[1].xxxx 307: POW TEMP[25].z, TEMP[21].zzzz, IMM[1].xxxx 308: POW TEMP[25].w, TEMP[21].wwww, IMM[1].yyyy 309: MOV TEMP[17].w, TEMP[25].wwzw 310: ADD TEMP[20].yzw, TEMP[20].xxxx, TEMP[25].xxyz 311: MOV TEMP[13].yzw, TEMP[20].zyzw 312: MUL TEMP[20].yzw, TEMP[8].xxyz, TEMP[13] 313: MOV TEMP[13].yzw, TEMP[20].zyzw 314: MUL TEMP[16], TEMP[16].xxxx, TEMP[24].wwww 315: MOV_SAT TEMP[16], TEMP[16] 316: MAD TEMP[16].yzw, TEMP[16].wwww, TEMP[22].xxxx, TEMP[13] 317: MUL TEMP[20].xy, CONST[5].xxxx, IN[1] 318: DP3 TEMP[9].x, TEMP[19].xyzz, TEMP[4].xyzz 319: DP3 TEMP[21].x, TEMP[11].xyzz, TEMP[4].xyzz 320: MOV TEMP[9].y, TEMP[21].xxxx 321: DP3 TEMP[14].x, TEMP[19].xyzz, CONST[1].xyzz 322: DP3 TEMP[21].x, TEMP[11].xyzz, CONST[1].xyzz 323: MOV TEMP[14].y, TEMP[21].xxxx 324: DP3 TEMP[12].x, TEMP[19].xyzz, CONST[2].xyzz 325: DP3 TEMP[19].x, TEMP[11].xyzz, CONST[2].xyzz 326: MOV TEMP[12].y, TEMP[19].xxxx 327: MOV TEMP[19].xy, TEMP[20].xyyy 328: TEX TEMP[19], TEMP[19], SAMP[2], 2D 329: MOV TEMP[10].zw, TEMP[19].wwzw 330: MAD TEMP[20].xy, TEMP[19], IMM[1].wwww, IMM[0].yyyy 331: MOV TEMP[10].xy, TEMP[20].xyxx 332: MUL TEMP[20].xy, TEMP[10], CONST[3].xxxx 333: MOV TEMP[17].xy, TEMP[20].xyxx 334: MOV TEMP[17].z, IMM[0].xxxx 335: MOV TEMP[21].w, IMM[0].zzzz 336: MOV TEMP[21].x, TEMP[20].xxxx 337: MOV TEMP[21].y, TEMP[20].yyyy 338: MOV TEMP[21].z, IMM[0].xxxx 339: DP4 TEMP[20].x, TEMP[21], TEMP[21] 340: RSQ TEMP[20].x, TEMP[20].xxxx 341: MUL TEMP[20].xyz, TEMP[21], TEMP[20].xxxx 342: MAD TEMP[19].xyz, TEMP[19].zwww, IMM[1].yywz, IMM[0].zzyy 343: MOV TEMP[10].xyz, TEMP[19].xyzx 344: MUL TEMP[19].xyz, TEMP[17].xyyw, TEMP[17].xyxw 345: MOV TEMP[11].xyz, TEMP[19].xyzx 346: MAD TEMP[11].xyz, TEMP[10], TEMP[18].yyyy, -TEMP[11] 347: MOV TEMP[10].xyz, TEMP[11].xyzx 348: MOV TEMP[14].z, TEMP[13].xxxx 349: DP3 TEMP[14].x, TEMP[14].xyzz, TEMP[20].xyzz 350: MOV_SAT TEMP[14].x, TEMP[14].xxxx 351: DP3 TEMP[18].x, TEMP[12].xyzz, TEMP[20].xyzz 352: MOV_SAT TEMP[18].x, TEMP[18].xxxx 353: ADD TEMP[19].xyz, TEMP[9], TEMP[12] 354: MOV TEMP[12].xyz, TEMP[19].xyzx 355: RCP TEMP[19].x, TEMP[19].zzzz 356: MAD TEMP[12].xy, TEMP[12], TEMP[19].xxxx, -TEMP[17] 357: ADD TEMP[10].xy, TEMP[23].xxxx, TEMP[10] 358: MUL TEMP[17].w, TEMP[11].zzzz, TEMP[11].zzzz 359: MAD TEMP[17].w, TEMP[10].xxxx, TEMP[10].yyyy, -TEMP[17].wwww 360: MUL TEMP[19].w, TEMP[12].xxxx, TEMP[12].xxxx 361: MUL TEMP[21].w, TEMP[12].yyyy, TEMP[10].yyyy 362: DP2 TEMP[11].x, TEMP[12].xxxx, -TEMP[11].zzzz 363: ADD TEMP[11].x, TEMP[11].xxxx, TEMP[21].wwww 364: MUL TEMP[11].w, TEMP[12].yyyy, TEMP[11].xxxx 365: MAD TEMP[10].w, TEMP[19].wwww, TEMP[10].yyyy, TEMP[11].wwww 366: MUL TEMP[10].w, TEMP[10].wwww, IMM[2].xxxx 367: RCP TEMP[11].x, TEMP[17].wwww 368: MUL TEMP[12].w, TEMP[10].wwww, TEMP[11].xxxx 369: MOV TEMP[7].w, TEMP[12].wwww 370: MOV TEMP[19].x, -TEMP[17].wwww 371: FSGE TEMP[19].x, TEMP[19].xxxx, IMM[0].zzzz 372: UIF TEMP[19].xxxx :0 373: MOV TEMP[19].x, IMM[0].xxxx 374: ELSE :0 375: MOV TEMP[19].x, IMM[0].zzzz 376: ENDIF 377: MAD TEMP[10].w, TEMP[10].wwww, TEMP[11].xxxx, IMM[2].yyyy 378: FSGE TEMP[10].x, TEMP[10].wwww, IMM[0].zzzz 379: UIF TEMP[10].xxxx :0 380: MOV TEMP[10].x, IMM[0].xxxx 381: ELSE :0 382: MOV TEMP[10].x, IMM[0].zzzz 383: ENDIF 384: ADD TEMP[10].w, TEMP[10].xxxx, TEMP[19].xxxx 385: MUL TEMP[11].w, TEMP[12].wwww, IMM[2].zzzz 386: EX2 TEMP[11].x, TEMP[11].wwww 387: MAX TEMP[12].x, TEMP[17].wwww, IMM[2].wwww 388: RSQ TEMP[12].x, TEMP[12].xxxx 389: MUL TEMP[11].w, TEMP[12].xxxx, TEMP[11].xxxx 390: MOV TEMP[10].x, -TEMP[10].wwww 391: FSGE TEMP[10].x, TEMP[10].xxxx, IMM[0].zzzz 392: UIF TEMP[10].xxxx :0 393: MOV TEMP[10].x, TEMP[11].wwww 394: ELSE :0 395: MOV TEMP[10].x, IMM[0].zzzz 396: ENDIF 397: MOV TEMP[2].w, TEMP[10].xxxx 398: DP3 TEMP[11].x, TEMP[20].xyzz, TEMP[9].xyzz 399: ADD TEMP[11].w, -TEMP[11].xxxx, IMM[0].xxxx 400: MUL TEMP[12].w, TEMP[11].wwww, TEMP[11].wwww 401: MUL TEMP[12].w, TEMP[12].wwww, TEMP[12].wwww 402: MUL TEMP[11].w, TEMP[11].wwww, TEMP[12].wwww 403: DP3 TEMP[9].x, TEMP[20].yzxx, TEMP[3].xxzz 404: DP3 TEMP[12].x, TEMP[20].xzyy, TEMP[3].yyzz 405: MOV TEMP[9].y, TEMP[12].xxxx 406: MUL TEMP[12].xyz, TEMP[2], IMM[1].zzyw 407: DP3 TEMP[12].x, TEMP[20].xyzz, TEMP[12].xyzz 408: MOV TEMP[9].z, TEMP[12].xxxx 409: MOV TEMP[12].xyz, TEMP[9].xyzz 410: TEX TEMP[12], TEMP[12], SAMP[0], CUBE 411: POW TEMP[17].x, TEMP[12].xxxx, IMM[1].xxxx 412: POW TEMP[17].y, TEMP[12].yyyy, IMM[1].xxxx 413: POW TEMP[17].z, TEMP[12].zzzz, IMM[1].xxxx 414: POW TEMP[17].w, TEMP[12].wwww, IMM[1].yyyy 415: MOV TEMP[9].w, TEMP[17].wwww 416: ADD TEMP[12].xyz, TEMP[14].xxxx, TEMP[17] 417: MOV TEMP[9].xyz, TEMP[12].xyzx 418: MUL TEMP[12].xyz, TEMP[7], TEMP[9] 419: MOV TEMP[9].xyz, TEMP[12].xyzx 420: MUL TEMP[10], TEMP[10].xxxx, TEMP[11].wwww 421: MOV_SAT TEMP[10], TEMP[10] 422: MAD TEMP[10].xyz, TEMP[10].yyyy, TEMP[18].xxxx, TEMP[9] 423: MOV TEMP[9].xyz, TEMP[10].xyzx 424: MUL TEMP[10].xyz, TEMP[6].yyyy, TEMP[16].yzww 425: MAD TEMP[10].xyw, TEMP[15].xyzz, TEMP[6].xxxx, TEMP[10].xyzz 426: MOV TEMP[5].w, TEMP[10].xyxw 427: MAD TEMP[6].xyz, TEMP[9], TEMP[6].zzzz, TEMP[10].xyww 428: MOV TEMP[5].xyz, TEMP[6].xyzx 429: DP3 TEMP[6].x, TEMP[8].xyzz, IMM[3].xyzz 430: ADD TEMP[6].y, -TEMP[6].xxxx, IMM[0].xxxx 431: MUL TEMP[6].xyz, TEMP[6].yyyy, CONST[7] 432: MOV TEMP[7].xyz, TEMP[6].xyzx 433: MOV_SAT TEMP[6].x, TEMP[13].xxxx 434: MOV TEMP[8].xyz, TEMP[3].xyzz 435: TEX TEMP[8], TEMP[8], SAMP[0], CUBE 436: POW TEMP[9].x, TEMP[8].xxxx, IMM[1].xxxx 437: POW TEMP[9].y, TEMP[8].yyyy, IMM[1].xxxx 438: POW TEMP[9].z, TEMP[8].zzzz, IMM[1].xxxx 439: POW TEMP[9].w, TEMP[8].wwww, IMM[1].yyyy 440: MOV TEMP[2].w, TEMP[9].wwww 441: ADD TEMP[6].xyz, TEMP[9], TEMP[6].xxxx 442: MOV TEMP[2].xyz, TEMP[6].xyzx 443: MAD TEMP[6].xyz, TEMP[7], TEMP[2], TEMP[5] 444: MAD TEMP[8].x, IN[3].yyyy, IMM[2].xxxx, IMM[2].xxxx 445: MOV TEMP[5].x, TEMP[8].xxxx 446: MOV TEMP[5].y, CONST[16].wwww 447: MOV TEMP[8].xy, TEMP[5].xyyy 448: TEX TEMP[8].x, TEMP[8], SAMP[6], 2D 449: MOV TEMP[5].x, TEMP[8].xxxx 450: ADD TEMP[9].yzw, -CONST[0].xxyz, IN[1].xxyz 451: DP3 TEMP[10].x, TEMP[9].yzww, TEMP[9].yzww 452: MUL TEMP[10].y, TEMP[10].xxxx, CONST[16].yyyy 453: MUL TEMP[11].w, TEMP[9].wwww, CONST[16].xxxx 454: MUL TEMP[11].w, TEMP[11].wwww, IMM[3].wwww 455: EX2 TEMP[11].x, TEMP[11].wwww 456: ADD TEMP[11].w, -TEMP[11].xxxx, IMM[0].xxxx 457: MUL TEMP[10].y, TEMP[11].wwww, TEMP[10].yyyy 458: RCP TEMP[9].x, TEMP[9].wwww 459: MUL TEMP[9].y, TEMP[9].xxxx, TEMP[10].yyyy 460: MUL TEMP[9].y, TEMP[9].yyyy, IMM[3].wwww 461: EX2 TEMP[9].x, TEMP[9].yyyy 462: MOV_SAT TEMP[9].x, TEMP[9].xxxx 463: ADD TEMP[9].y, -TEMP[9].xxxx, IMM[0].xxxx 464: MUL TEMP[10].w, TEMP[9].yyyy, TEMP[8].xxxx 465: ADD TEMP[11].w, -CONST[8].xxxx, IN[1].zzzz 466: MOV TEMP[1].w, TEMP[11].wwww 467: FSGE TEMP[12].x, TEMP[11].wwww, IMM[0].zzzz 468: UIF TEMP[12].xxxx :0 469: MOV TEMP[12].x, IMM[0].xxxx 470: ELSE :0 471: MOV TEMP[12].x, IMM[0].zzzz 472: ENDIF 473: MOV TEMP[12].w, TEMP[12].xxxx 474: ADD TEMP[13].w, CONST[8].xxxx, -IN[1].zzzz 475: MOV TEMP[7].xyz, CONST[11].xyzx 476: ADD TEMP[14].xyz, -TEMP[7], CONST[12] 477: MUL TEMP[15], TEMP[13].wwww, CONST[13].xxxx 478: MOV_SAT TEMP[15], TEMP[15] 479: MAD TEMP[14].yzw, TEMP[15].yyyy, TEMP[14].xxyz, CONST[11].xxyz 480: MOV TEMP[5].yzw, TEMP[14].zyzw 481: MUL TEMP[14].yzw, TEMP[6].xxyz, TEMP[5] 482: RCP TEMP[7].x, -TEMP[4].zzzz 483: MUL TEMP[7].w, TEMP[13].wwww, TEMP[7].xxxx 484: MUL TEMP[7].xyw, TEMP[7].wwww, TEMP[4].xyzz 485: MOV TEMP[3].w, TEMP[7].xyxw 486: DP3 TEMP[3].x, TEMP[7].xyww, TEMP[7].xyww 487: MAX TEMP[7].x, TEMP[3].xxxx, IMM[2].wwww 488: RSQ TEMP[13].x, TEMP[7].xxxx 489: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[7].xxxx 490: CMP TEMP[3].x, -TEMP[7].xxxx, TEMP[13].xxxx, IMM[0].zzzz 491: MUL TEMP[7].x, -TEMP[3].xxxx, CONST[14].xxxx 492: MUL TEMP[7].x, TEMP[7].xxxx, IMM[3].wwww 493: EX2 TEMP[7].x, TEMP[7].xxxx 494: MUL TEMP[13].y, TEMP[4].zzzz, TEMP[4].zzzz 495: MUL TEMP[13].y, TEMP[13].yyyy, TEMP[13].yyyy 496: MAD TEMP[4].y, TEMP[4].zzzz, -TEMP[13].yyyy, IMM[0].xxxx 497: ADD TEMP[4].y, -TEMP[4].yyyy, IMM[0].xxxx 498: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[4].yyyy 499: MUL TEMP[4].xyz, TEMP[4].xxxx, TEMP[14].yzww 500: MOV TEMP[3].xyz, TEMP[4].xyzx 501: MAD TEMP[4].y, TEMP[8].xxxx, -TEMP[9].yyyy, IMM[0].xxxx 502: MUL TEMP[4].xyz, TEMP[4].yyyy, TEMP[3] 503: MOV TEMP[3].w, IMM[0].zzzz 504: FSGE TEMP[7].x, TEMP[11].wwww, IMM[0].zzzz 505: UIF TEMP[7].xxxx :0 506: MOV TEMP[7].x, TEMP[6].xxxx 507: ELSE :0 508: MOV TEMP[7].x, TEMP[4].xxxx 509: ENDIF 510: FSGE TEMP[8].x, TEMP[11].wwww, IMM[0].zzzz 511: UIF TEMP[8].xxxx :0 512: MOV TEMP[8].x, TEMP[6].yyyy 513: ELSE :0 514: MOV TEMP[8].x, TEMP[4].yyyy 515: ENDIF 516: FSGE TEMP[9].x, TEMP[11].wwww, IMM[0].zzzz 517: UIF TEMP[9].xxxx :0 518: MOV TEMP[6].x, TEMP[6].zzzz 519: ELSE :0 520: MOV TEMP[6].x, TEMP[4].zzzz 521: ENDIF 522: FSGE TEMP[4].x, TEMP[11].wwww, IMM[0].zzzz 523: UIF TEMP[4].xxxx :0 524: MOV TEMP[4].x, TEMP[10].wwww 525: ELSE :0 526: MOV TEMP[4].x, IMM[0].zzzz 527: ENDIF 528: MOV TEMP[2].w, TEMP[4].xxxx 529: ABS TEMP[7].x, TEMP[7].xxxx 530: LG2 TEMP[3].x, TEMP[7].xxxx 531: ABS TEMP[7].x, TEMP[8].xxxx 532: LG2 TEMP[7].x, TEMP[7].xxxx 533: MOV TEMP[3].y, TEMP[7].xxxx 534: ABS TEMP[6].x, TEMP[6].xxxx 535: LG2 TEMP[6].x, TEMP[6].xxxx 536: MOV TEMP[3].z, TEMP[6].xxxx 537: MUL TEMP[6].xyz, TEMP[3], IMM[4].xxxx 538: EX2 TEMP[3].x, TEMP[6].xxxx 539: EX2 TEMP[7].x, TEMP[6].yyyy 540: MOV TEMP[3].y, TEMP[7].xxxx 541: EX2 TEMP[6].x, TEMP[6].zzzz 542: MOV TEMP[3].z, TEMP[6].xxxx 543: MOV TEMP[6].xy, IN[5].xyyy 544: TEX TEMP[6], TEMP[6], SAMP[8], 2D 545: MOV TEMP[5].w, TEMP[6].wwww 546: LRP TEMP[3].xyz, TEMP[0].xxxx, TEMP[3], TEMP[6] 547: MOV TEMP[2].xyz, TEMP[3].xyzx 548: MOV TEMP[3].xyz, TEMP[3].xyzz 549: TEX TEMP[3], TEMP[3], SAMP[9], 3D 550: MAD TEMP[0].x, TEMP[0].zzzz, -TEMP[0].xxxx, TEMP[0].xxxx 551: LRP TEMP[0].xyz, TEMP[0].xxxx, TEMP[3], TEMP[2] 552: MOV TEMP[5].xyz, TEMP[0].xyzx 553: ADD TEMP[0].xyz, -TEMP[5], CONST[15] 554: MOV TEMP[1].xyz, TEMP[0].xyzx 555: MAD TEMP[0].xyz, TEMP[4].xxxx, TEMP[1], TEMP[5] 556: MOV TEMP[12].xyz, TEMP[0].xyzx 557: MOV OUT[0], TEMP[12] 558: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %59 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %60 = load <8 x i32> addrspace(2)* %59, !tbaa !0 %61 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %62 = load <4 x i32> addrspace(2)* %61, !tbaa !0 %63 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %64 = load <8 x i32> addrspace(2)* %63, !tbaa !0 %65 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %66 = load <4 x i32> addrspace(2)* %65, !tbaa !0 %67 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %68 = load <8 x i32> addrspace(2)* %67, !tbaa !0 %69 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %70 = load <4 x i32> addrspace(2)* %69, !tbaa !0 %71 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %72 = load <8 x i32> addrspace(2)* %71, !tbaa !0 %73 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %74 = load <4 x i32> addrspace(2)* %73, !tbaa !0 %75 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %76 = load <8 x i32> addrspace(2)* %75, !tbaa !0 %77 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %78 = load <4 x i32> addrspace(2)* %77, !tbaa !0 %79 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %80 = load <8 x i32> addrspace(2)* %79, !tbaa !0 %81 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %82 = load <4 x i32> addrspace(2)* %81, !tbaa !0 %83 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %84 = load <8 x i32> addrspace(2)* %83, !tbaa !0 %85 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %86 = load <4 x i32> addrspace(2)* %85, !tbaa !0 %87 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %88 = load <8 x i32> addrspace(2)* %87, !tbaa !0 %89 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %90 = load <4 x i32> addrspace(2)* %89, !tbaa !0 %91 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %92 = load <8 x i32> addrspace(2)* %91, !tbaa !0 %93 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %94 = load <4 x i32> addrspace(2)* %93, !tbaa !0 %95 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %96 = load <8 x i32> addrspace(2)* %95, !tbaa !0 %97 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %98 = load <4 x i32> addrspace(2)* %97, !tbaa !0 %99 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %113 = fmul float %109, 1.000000e+00 %114 = fadd float %113, 0.000000e+00 %115 = fmul float %110, -1.000000e+00 %116 = fadd float %115, 1.000000e+00 %117 = bitcast float %114 to i32 %118 = bitcast float %116 to i32 %119 = insertelement <2 x i32> undef, i32 %117, i32 0 %120 = insertelement <2 x i32> %119, i32 %118, i32 1 %121 = bitcast <8 x i32> %88 to <32 x i8> %122 = bitcast <4 x i32> %90 to <16 x i8> %123 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %120, <32 x i8> %121, <16 x i8> %122, i32 2) %124 = extractelement <4 x float> %123, i32 0 %125 = extractelement <4 x float> %123, i32 2 %126 = call float @fabs(float %124) %127 = fsub float -0.000000e+00, %126 %128 = fsub float -0.000000e+00, %126 %129 = fsub float -0.000000e+00, %126 %130 = fsub float -0.000000e+00, %126 %131 = fcmp oge float %127, 0.000000e+00 %132 = sext i1 %131 to i32 %133 = bitcast i32 %132 to float %134 = bitcast float %133 to i32 %135 = icmp ne i32 %134, 0 %. = select i1 %135, float -1.000000e+00, float -0.000000e+00 %136 = fcmp oge float %128, 0.000000e+00 %137 = sext i1 %136 to i32 %138 = bitcast i32 %137 to float %139 = bitcast float %138 to i32 %140 = icmp ne i32 %139, 0 %temp16.0 = select i1 %140, float -1.000000e+00, float -0.000000e+00 %141 = fcmp oge float %129, 0.000000e+00 %142 = sext i1 %141 to i32 %143 = bitcast i32 %142 to float %144 = bitcast float %143 to i32 %145 = icmp ne i32 %144, 0 %.170 = select i1 %145, float -1.000000e+00, float -0.000000e+00 %146 = fcmp oge float %130, 0.000000e+00 %147 = sext i1 %146 to i32 %148 = bitcast i32 %147 to float %149 = bitcast float %148 to i32 %150 = icmp ne i32 %149, 0 %temp8.0 = select i1 %150, float -1.000000e+00, float -0.000000e+00 %151 = fcmp olt float %., 0.000000e+00 %152 = sext i1 %151 to i32 %153 = fcmp olt float %temp16.0, 0.000000e+00 %154 = sext i1 %153 to i32 %155 = fcmp olt float %.170, 0.000000e+00 %156 = sext i1 %155 to i32 %157 = bitcast i32 %152 to float %158 = bitcast i32 %154 to float %159 = bitcast i32 %156 to float %160 = bitcast float %157 to i32 %161 = bitcast float %159 to i32 %162 = or i32 %160, %161 %163 = bitcast i32 %162 to float %164 = bitcast float %163 to i32 %165 = bitcast float %158 to i32 %166 = or i32 %164, %165 %167 = bitcast i32 %166 to float %168 = bitcast float %167 to i32 %169 = icmp ne i32 %168, 0 br i1 %169, label %IF126, label %ENDIF125 IF126: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF125 ENDIF125: ; preds = %main_body, %IF126 %170 = fmul float %99, %99 %171 = fmul float %100, %100 %172 = fadd float %170, %171 %173 = fmul float %101, %101 %174 = fadd float %172, %173 %175 = fmul float 0.000000e+00, 0.000000e+00 %176 = fadd float %174, %175 %177 = call float @llvm.AMDGPU.rsq.clamped.f32(float %176) %178 = fmul float %99, %177 %179 = fmul float %100, %177 %180 = fmul float %101, %177 %181 = fmul float %105, %105 %182 = fmul float %106, %106 %183 = fadd float %181, %182 %184 = fmul float %107, %107 %185 = fadd float %183, %184 %186 = fmul float 0.000000e+00, 0.000000e+00 %187 = fadd float %185, %186 %188 = call float @llvm.AMDGPU.rsq.clamped.f32(float %187) %189 = fmul float %105, %188 %190 = fmul float %106, %188 %191 = fmul float %107, %188 %192 = call float @fabs(float %178) %193 = call float @fabs(float %179) %194 = call float @fabs(float %180) %195 = call float @fabs(float %temp8.0) %196 = call float @fabs(float %178) %197 = call float @fabs(float %179) %198 = call float @fabs(float %180) %199 = call float @fabs(float %temp8.0) %200 = fmul float %192, %196 %201 = fmul float %193, %197 %202 = fmul float %194, %198 %203 = fmul float %200, %200 %204 = fmul float %201, %201 %205 = fmul float %202, %202 %206 = fadd float %204, %203 %207 = fmul float %202, %202 %208 = fadd float %207, %206 %209 = fdiv float 1.000000e+00, %208 %210 = fmul float %209, %203 %211 = fmul float %209, %204 %212 = fmul float %209, %205 %213 = fmul float %41, %103 %214 = fmul float %42, %104 %215 = bitcast float %213 to i32 %216 = bitcast float %214 to i32 %217 = insertelement <2 x i32> undef, i32 %215, i32 0 %218 = insertelement <2 x i32> %217, i32 %216, i32 1 %219 = bitcast <8 x i32> %72 to <32 x i8> %220 = bitcast <4 x i32> %74 to <16 x i8> %221 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %218, <32 x i8> %219, <16 x i8> %220, i32 2) %222 = extractelement <4 x float> %221, i32 0 %223 = extractelement <4 x float> %221, i32 1 %224 = extractelement <4 x float> %221, i32 2 %225 = call float @llvm.pow.f32(float %222, float 0x40019999A0000000) %226 = call float @llvm.pow.f32(float %223, float 0x40019999A0000000) %227 = call float @llvm.pow.f32(float %224, float 0x40019999A0000000) %228 = fmul float %41, %102 %229 = fmul float %42, %104 %230 = bitcast float %228 to i32 %231 = bitcast float %229 to i32 %232 = insertelement <2 x i32> undef, i32 %230, i32 0 %233 = insertelement <2 x i32> %232, i32 %231, i32 1 %234 = bitcast <8 x i32> %72 to <32 x i8> %235 = bitcast <4 x i32> %74 to <16 x i8> %236 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %233, <32 x i8> %234, <16 x i8> %235, i32 2) %237 = extractelement <4 x float> %236, i32 0 %238 = extractelement <4 x float> %236, i32 1 %239 = extractelement <4 x float> %236, i32 2 %240 = call float @llvm.pow.f32(float %237, float 0x40019999A0000000) %241 = call float @llvm.pow.f32(float %238, float 0x40019999A0000000) %242 = call float @llvm.pow.f32(float %239, float 0x40019999A0000000) %243 = fmul float %211, %240 %244 = fmul float %211, %241 %245 = fmul float %211, %242 %246 = fmul float %225, %210 %247 = fadd float %246, %243 %248 = fmul float %226, %210 %249 = fadd float %248, %244 %250 = fmul float %227, %210 %251 = fadd float %250, %245 %252 = fmul float %34, %102 %253 = fmul float %34, %103 %254 = bitcast float %252 to i32 %255 = bitcast float %253 to i32 %256 = insertelement <2 x i32> undef, i32 %254, i32 0 %257 = insertelement <2 x i32> %256, i32 %255, i32 1 %258 = bitcast <8 x i32> %64 to <32 x i8> %259 = bitcast <4 x i32> %66 to <16 x i8> %260 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %257, <32 x i8> %258, <16 x i8> %259, i32 2) %261 = extractelement <4 x float> %260, i32 0 %262 = extractelement <4 x float> %260, i32 1 %263 = extractelement <4 x float> %260, i32 2 %264 = extractelement <4 x float> %260, i32 3 %265 = call float @llvm.pow.f32(float %261, float 0x40019999A0000000) %266 = call float @llvm.pow.f32(float %262, float 0x40019999A0000000) %267 = call float @llvm.pow.f32(float %263, float 0x40019999A0000000) %268 = fmul float %265, %212 %269 = fadd float %268, %247 %270 = fmul float %266, %212 %271 = fadd float %270, %249 %272 = fmul float %267, %212 %273 = fadd float %272, %251 %274 = fmul float %43, %103 %275 = fmul float %44, %104 %276 = fsub float -0.000000e+00, %180 %277 = fmul float %276, %189 %278 = fmul float %179, %190 %279 = fadd float %278, %277 %280 = fmul float %178, %191 %281 = fadd float %279, %280 %282 = fmul float %178, 1.000000e+00 %283 = fmul float %180, 1.000000e+00 %284 = fmul float %179, -1.000000e+00 %285 = fmul float %284, %189 %286 = fmul float %282, %190 %287 = fadd float %286, %285 %288 = fmul float %283, %191 %289 = fadd float %287, %288 %290 = fmul float %178, %189 %291 = fmul float %179, %190 %292 = fadd float %291, %290 %293 = fmul float %180, %191 %294 = fadd float %292, %293 %295 = fmul float %180, -1.000000e+00 %296 = fmul float %179, 1.000000e+00 %297 = fmul float %178, 1.000000e+00 %298 = fmul float %295, %27 %299 = fmul float %296, %28 %300 = fadd float %299, %298 %301 = fmul float %297, %29 %302 = fadd float %300, %301 %303 = fmul float %284, %27 %304 = fmul float %282, %28 %305 = fadd float %304, %303 %306 = fmul float %283, %29 %307 = fadd float %305, %306 %308 = fmul float %178, %27 %309 = fmul float %179, %28 %310 = fadd float %309, %308 %311 = fmul float %180, %29 %312 = fadd float %310, %311 %313 = fmul float %295, %30 %314 = fmul float %296, %31 %315 = fadd float %314, %313 %316 = fmul float %297, %32 %317 = fadd float %315, %316 %318 = fmul float %284, %30 %319 = fmul float %282, %31 %320 = fadd float %319, %318 %321 = fmul float %283, %32 %322 = fadd float %320, %321 %323 = fmul float %178, %30 %324 = fmul float %179, %31 %325 = fadd float %324, %323 %326 = fmul float %180, %32 %327 = fadd float %325, %326 %328 = bitcast float %274 to i32 %329 = bitcast float %275 to i32 %330 = insertelement <2 x i32> undef, i32 %328, i32 0 %331 = insertelement <2 x i32> %330, i32 %329, i32 1 %332 = bitcast <8 x i32> %80 to <32 x i8> %333 = bitcast <4 x i32> %82 to <16 x i8> %334 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %331, <32 x i8> %332, <16 x i8> %333, i32 2) %335 = extractelement <4 x float> %334, i32 3 %336 = bitcast float %274 to i32 %337 = bitcast float %275 to i32 %338 = insertelement <2 x i32> undef, i32 %336, i32 0 %339 = insertelement <2 x i32> %338, i32 %337, i32 1 %340 = bitcast <8 x i32> %76 to <32 x i8> %341 = bitcast <4 x i32> %78 to <16 x i8> %342 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %339, <32 x i8> %340, <16 x i8> %341, i32 2) %343 = extractelement <4 x float> %342, i32 0 %344 = extractelement <4 x float> %342, i32 1 %345 = extractelement <4 x float> %342, i32 2 %346 = extractelement <4 x float> %342, i32 3 %347 = fmul float %33, %33 %348 = fmul float %343, 2.000000e+00 %349 = fadd float %348, -1.000000e+00 %350 = fmul float %344, 2.000000e+00 %351 = fadd float %350, -1.000000e+00 %352 = fmul float %349, %33 %353 = fmul float %351, %33 %354 = fmul float %352, %352 %355 = fmul float %353, %353 %356 = fadd float %354, %355 %357 = fmul float 1.000000e+00, 1.000000e+00 %358 = fadd float %356, %357 %359 = fmul float 0.000000e+00, 0.000000e+00 %360 = fadd float %358, %359 %361 = call float @llvm.AMDGPU.rsq.clamped.f32(float %360) %362 = fmul float %352, %361 %363 = fmul float %353, %361 %364 = fmul float 1.000000e+00, %361 %365 = fmul float %335, 2.000000e+00 %366 = fadd float %365, -1.000000e+00 %367 = fmul float %352, %352 %368 = fmul float %353, %353 %369 = fmul float %353, %352 %370 = fsub float -0.000000e+00, %367 %371 = fmul float %345, %347 %372 = fadd float %371, %370 %373 = fsub float -0.000000e+00, %368 %374 = fmul float %346, %347 %375 = fadd float %374, %373 %376 = fsub float -0.000000e+00, %369 %377 = fmul float %366, %347 %378 = fadd float %377, %376 %379 = fmul float %302, %362 %380 = fmul float %307, %363 %381 = fadd float %380, %379 %382 = fmul float %312, %364 %383 = fadd float %381, %382 %384 = call float @llvm.AMDIL.clamp.(float %383, float 0.000000e+00, float 1.000000e+00) %385 = fmul float %317, %362 %386 = fmul float %322, %363 %387 = fadd float %386, %385 %388 = fmul float %327, %364 %389 = fadd float %387, %388 %390 = call float @llvm.AMDIL.clamp.(float %389, float 0.000000e+00, float 1.000000e+00) %391 = fadd float %281, %317 %392 = fadd float %289, %322 %393 = fadd float %294, %327 %394 = fdiv float 1.000000e+00, %393 %395 = fsub float -0.000000e+00, %352 %396 = fmul float %391, %394 %397 = fadd float %396, %395 %398 = fsub float -0.000000e+00, %353 %399 = fmul float %392, %394 %400 = fadd float %399, %398 %401 = fdiv float 1.000000e+00, %36 %402 = fadd float %401, %372 %403 = fadd float %401, %375 %404 = fmul float %378, %378 %405 = fsub float -0.000000e+00, %404 %406 = fmul float %402, %403 %407 = fadd float %406, %405 %408 = fmul float %397, %397 %409 = fmul float %400, %403 %410 = fsub float -0.000000e+00, %378 %411 = fsub float -0.000000e+00, %378 %412 = fmul float %397, %410 %413 = fmul float %397, %411 %414 = fadd float %412, %413 %415 = fadd float %414, %409 %416 = fmul float %400, %415 %417 = fmul float %408, %403 %418 = fadd float %417, %416 %419 = fmul float %418, 5.000000e-01 %420 = fdiv float 1.000000e+00, %407 %421 = fmul float %419, %420 %422 = fsub float -0.000000e+00, %407 %423 = fcmp oge float %422, 0.000000e+00 %424 = sext i1 %423 to i32 %425 = bitcast i32 %424 to float %426 = bitcast float %425 to i32 %427 = icmp ne i32 %426, 0 %.171 = select i1 %427, float 1.000000e+00, float 0.000000e+00 %428 = fmul float %419, %420 %429 = fadd float %428, -1.600000e+01 %430 = fcmp oge float %429, 0.000000e+00 %431 = sext i1 %430 to i32 %432 = bitcast i32 %431 to float %433 = bitcast float %432 to i32 %434 = icmp ne i32 %433, 0 %temp64.0 = select i1 %434, float 1.000000e+00, float 0.000000e+00 %435 = fadd float %temp64.0, %.171 %436 = fmul float %421, 0xBFF7154CA0000000 %437 = call float @llvm.AMDIL.exp.(float %436) %438 = fcmp uge float %407, 0x3E7AD7F2A0000000 %439 = select i1 %438, float %407, float 0x3E7AD7F2A0000000 %440 = call float @llvm.AMDGPU.rsq.clamped.f32(float %439) %441 = fmul float %440, %437 %442 = fsub float -0.000000e+00, %435 %443 = fcmp oge float %442, 0.000000e+00 %444 = sext i1 %443 to i32 %445 = bitcast i32 %444 to float %446 = bitcast float %445 to i32 %447 = icmp ne i32 %446, 0 %.172 = select i1 %447, float %441, float 0.000000e+00 %448 = fmul float %362, %281 %449 = fmul float %364, %294 %450 = fadd float %449, %448 %451 = fmul float %363, %289 %452 = fadd float %450, %451 %453 = fsub float -0.000000e+00, %452 %454 = fadd float %453, 1.000000e+00 %455 = fmul float %454, %454 %456 = fmul float %455, %455 %457 = fmul float %454, %456 %458 = fmul float %180, -1.000000e+00 %459 = fmul float %179, -1.000000e+00 %460 = fmul float %178, 1.000000e+00 %461 = fmul float %362, %458 %462 = fmul float %363, %459 %463 = fadd float %462, %461 %464 = fmul float %364, %460 %465 = fadd float %463, %464 %466 = fmul float %363, %178 %467 = fmul float %362, %179 %468 = fadd float %467, %466 %469 = fmul float %364, %179 %470 = fadd float %468, %469 %471 = fmul float %362, %178 %472 = fmul float %363, %180 %473 = fadd float %472, %471 %474 = fmul float %364, %180 %475 = fadd float %473, %474 %476 = insertelement <4 x float> undef, float %465, i32 0 %477 = insertelement <4 x float> %476, float %470, i32 1 %478 = insertelement <4 x float> %477, float %475, i32 2 %479 = insertelement <4 x float> %478, float 0.000000e+00, i32 3 %480 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %479) %481 = extractelement <4 x float> %480, i32 0 %482 = extractelement <4 x float> %480, i32 1 %483 = extractelement <4 x float> %480, i32 2 %484 = extractelement <4 x float> %480, i32 3 %485 = call float @fabs(float %483) %486 = fdiv float 1.000000e+00, %485 %487 = fmul float %481, %486 %488 = fadd float %487, 1.500000e+00 %489 = fmul float %482, %486 %490 = fadd float %489, 1.500000e+00 %491 = bitcast float %490 to i32 %492 = bitcast float %488 to i32 %493 = bitcast float %484 to i32 %494 = insertelement <4 x i32> undef, i32 %491, i32 0 %495 = insertelement <4 x i32> %494, i32 %492, i32 1 %496 = insertelement <4 x i32> %495, i32 %493, i32 2 %497 = insertelement <4 x i32> %496, i32 undef, i32 3 %498 = bitcast <8 x i32> %60 to <32 x i8> %499 = bitcast <4 x i32> %62 to <16 x i8> %500 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %497, <32 x i8> %498, <16 x i8> %499, i32 4) %501 = extractelement <4 x float> %500, i32 0 %502 = extractelement <4 x float> %500, i32 1 %503 = extractelement <4 x float> %500, i32 2 %504 = call float @llvm.pow.f32(float %501, float 0x40019999A0000000) %505 = call float @llvm.pow.f32(float %502, float 0x40019999A0000000) %506 = call float @llvm.pow.f32(float %503, float 0x40019999A0000000) %507 = fadd float %384, %504 %508 = fadd float %384, %505 %509 = fadd float %384, %506 %510 = fmul float %269, %507 %511 = fmul float %271, %508 %512 = fmul float %273, %509 %513 = fmul float %.172, %457 %514 = fmul float %.172, %457 %515 = fmul float %.172, %457 %516 = fmul float %.172, %457 %517 = call float @llvm.AMDIL.clamp.(float %513, float 0.000000e+00, float 1.000000e+00) %518 = call float @llvm.AMDIL.clamp.(float %514, float 0.000000e+00, float 1.000000e+00) %519 = call float @llvm.AMDIL.clamp.(float %515, float 0.000000e+00, float 1.000000e+00) %520 = call float @llvm.AMDIL.clamp.(float %516, float 0.000000e+00, float 1.000000e+00) %521 = fmul float %520, %390 %522 = fadd float %521, %510 %523 = fmul float %520, %390 %524 = fadd float %523, %511 %525 = fmul float %520, %390 %526 = fadd float %525, %512 %527 = fmul float %43, %102 %528 = fmul float %44, %104 %529 = fmul float %180, 1.000000e+00 %530 = fmul float %179, 1.000000e+00 %531 = fmul float %178, -1.000000e+00 %532 = fmul float %531, %189 %533 = fmul float %529, %190 %534 = fadd float %533, %532 %535 = fmul float %530, %191 %536 = fadd float %534, %535 %537 = fmul float %531, %27 %538 = fmul float %529, %28 %539 = fadd float %538, %537 %540 = fmul float %530, %29 %541 = fadd float %539, %540 %542 = fmul float %531, %30 %543 = fmul float %529, %31 %544 = fadd float %543, %542 %545 = fmul float %530, %32 %546 = fadd float %544, %545 %547 = bitcast float %527 to i32 %548 = bitcast float %528 to i32 %549 = insertelement <2 x i32> undef, i32 %547, i32 0 %550 = insertelement <2 x i32> %549, i32 %548, i32 1 %551 = bitcast <8 x i32> %80 to <32 x i8> %552 = bitcast <4 x i32> %82 to <16 x i8> %553 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %550, <32 x i8> %551, <16 x i8> %552, i32 2) %554 = extractelement <4 x float> %553, i32 3 %555 = bitcast float %527 to i32 %556 = bitcast float %528 to i32 %557 = insertelement <2 x i32> undef, i32 %555, i32 0 %558 = insertelement <2 x i32> %557, i32 %556, i32 1 %559 = bitcast <8 x i32> %76 to <32 x i8> %560 = bitcast <4 x i32> %78 to <16 x i8> %561 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %558, <32 x i8> %559, <16 x i8> %560, i32 2) %562 = extractelement <4 x float> %561, i32 0 %563 = extractelement <4 x float> %561, i32 1 %564 = extractelement <4 x float> %561, i32 2 %565 = extractelement <4 x float> %561, i32 3 %566 = fmul float %562, 2.000000e+00 %567 = fadd float %566, -1.000000e+00 %568 = fmul float %563, 2.000000e+00 %569 = fadd float %568, -1.000000e+00 %570 = fmul float %567, %33 %571 = fmul float %569, %33 %572 = fmul float %570, %570 %573 = fmul float %571, %571 %574 = fadd float %572, %573 %575 = fmul float 1.000000e+00, 1.000000e+00 %576 = fadd float %574, %575 %577 = fmul float 0.000000e+00, 0.000000e+00 %578 = fadd float %576, %577 %579 = call float @llvm.AMDGPU.rsq.clamped.f32(float %578) %580 = fmul float %570, %579 %581 = fmul float %571, %579 %582 = fmul float 1.000000e+00, %579 %583 = fmul float %554, 2.000000e+00 %584 = fadd float %583, -1.000000e+00 %585 = fmul float %570, %570 %586 = fmul float %571, %571 %587 = fmul float %571, %570 %588 = fsub float -0.000000e+00, %585 %589 = fmul float %564, %347 %590 = fadd float %589, %588 %591 = fsub float -0.000000e+00, %586 %592 = fmul float %565, %347 %593 = fadd float %592, %591 %594 = fsub float -0.000000e+00, %587 %595 = fmul float %584, %347 %596 = fadd float %595, %594 %597 = fmul float %541, %580 %598 = fmul float %307, %581 %599 = fadd float %598, %597 %600 = fmul float %312, %582 %601 = fadd float %599, %600 %602 = call float @llvm.AMDIL.clamp.(float %601, float 0.000000e+00, float 1.000000e+00) %603 = fmul float %546, %580 %604 = fmul float %322, %581 %605 = fadd float %604, %603 %606 = fmul float %327, %582 %607 = fadd float %605, %606 %608 = call float @llvm.AMDIL.clamp.(float %607, float 0.000000e+00, float 1.000000e+00) %609 = fadd float %536, %546 %610 = fadd float %289, %322 %611 = fadd float %294, %327 %612 = fdiv float 1.000000e+00, %611 %613 = fsub float -0.000000e+00, %570 %614 = fmul float %609, %612 %615 = fadd float %614, %613 %616 = fsub float -0.000000e+00, %571 %617 = fmul float %610, %612 %618 = fadd float %617, %616 %619 = fadd float %401, %590 %620 = fadd float %401, %593 %621 = fmul float %596, %596 %622 = fsub float -0.000000e+00, %621 %623 = fmul float %619, %620 %624 = fadd float %623, %622 %625 = fmul float %615, %615 %626 = fmul float %618, %620 %627 = fsub float -0.000000e+00, %596 %628 = fsub float -0.000000e+00, %596 %629 = fmul float %615, %627 %630 = fmul float %615, %628 %631 = fadd float %629, %630 %632 = fadd float %631, %626 %633 = fmul float %618, %632 %634 = fmul float %625, %620 %635 = fadd float %634, %633 %636 = fmul float %635, 5.000000e-01 %637 = fdiv float 1.000000e+00, %624 %638 = fmul float %636, %637 %639 = fsub float -0.000000e+00, %624 %640 = fcmp oge float %639, 0.000000e+00 %641 = sext i1 %640 to i32 %642 = bitcast i32 %641 to float %643 = bitcast float %642 to i32 %644 = icmp ne i32 %643, 0 %temp108.0 = select i1 %644, float 1.000000e+00, float 0.000000e+00 %645 = fmul float %636, %637 %646 = fadd float %645, -1.600000e+01 %647 = fcmp oge float %646, 0.000000e+00 %648 = sext i1 %647 to i32 %649 = bitcast i32 %648 to float %650 = bitcast float %649 to i32 %651 = icmp ne i32 %650, 0 %.173 = select i1 %651, float 1.000000e+00, float 0.000000e+00 %652 = fadd float %.173, %temp108.0 %653 = fmul float %638, 0xBFF7154CA0000000 %654 = call float @llvm.AMDIL.exp.(float %653) %655 = fcmp uge float %624, 0x3E7AD7F2A0000000 %656 = select i1 %655, float %624, float 0x3E7AD7F2A0000000 %657 = call float @llvm.AMDGPU.rsq.clamped.f32(float %656) %658 = fmul float %657, %654 %659 = fsub float -0.000000e+00, %652 %660 = fcmp oge float %659, 0.000000e+00 %661 = sext i1 %660 to i32 %662 = bitcast i32 %661 to float %663 = bitcast float %662 to i32 %664 = icmp ne i32 %663, 0 %temp64.3 = select i1 %664, float %658, float 0.000000e+00 %665 = fmul float %580, %536 %666 = fmul float %582, %294 %667 = fadd float %666, %665 %668 = fmul float %581, %289 %669 = fadd float %667, %668 %670 = fsub float -0.000000e+00, %669 %671 = fadd float %670, 1.000000e+00 %672 = fmul float %671, %671 %673 = fmul float %672, %672 %674 = fmul float %671, %673 %675 = fmul float %178, -1.000000e+00 %676 = fmul float %179, -1.000000e+00 %677 = fmul float %178, 1.000000e+00 %678 = fmul float %580, %675 %679 = fmul float %581, %676 %680 = fadd float %679, %678 %681 = fmul float %582, %677 %682 = fadd float %680, %681 %683 = fmul float %581, %178 %684 = fmul float %582, %179 %685 = fadd float %684, %683 %686 = fmul float %580, %180 %687 = fadd float %685, %686 %688 = fmul float %580, %179 %689 = fmul float %581, %180 %690 = fadd float %689, %688 %691 = fmul float %582, %180 %692 = fadd float %690, %691 %693 = insertelement <4 x float> undef, float %682, i32 0 %694 = insertelement <4 x float> %693, float %687, i32 1 %695 = insertelement <4 x float> %694, float %692, i32 2 %696 = insertelement <4 x float> %695, float 0.000000e+00, i32 3 %697 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %696) %698 = extractelement <4 x float> %697, i32 0 %699 = extractelement <4 x float> %697, i32 1 %700 = extractelement <4 x float> %697, i32 2 %701 = extractelement <4 x float> %697, i32 3 %702 = call float @fabs(float %700) %703 = fdiv float 1.000000e+00, %702 %704 = fmul float %698, %703 %705 = fadd float %704, 1.500000e+00 %706 = fmul float %699, %703 %707 = fadd float %706, 1.500000e+00 %708 = bitcast float %707 to i32 %709 = bitcast float %705 to i32 %710 = bitcast float %701 to i32 %711 = insertelement <4 x i32> undef, i32 %708, i32 0 %712 = insertelement <4 x i32> %711, i32 %709, i32 1 %713 = insertelement <4 x i32> %712, i32 %710, i32 2 %714 = insertelement <4 x i32> %713, i32 undef, i32 3 %715 = bitcast <8 x i32> %60 to <32 x i8> %716 = bitcast <4 x i32> %62 to <16 x i8> %717 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %714, <32 x i8> %715, <16 x i8> %716, i32 4) %718 = extractelement <4 x float> %717, i32 0 %719 = extractelement <4 x float> %717, i32 1 %720 = extractelement <4 x float> %717, i32 2 %721 = call float @llvm.pow.f32(float %718, float 0x40019999A0000000) %722 = call float @llvm.pow.f32(float %719, float 0x40019999A0000000) %723 = call float @llvm.pow.f32(float %720, float 0x40019999A0000000) %724 = fadd float %602, %721 %725 = fadd float %602, %722 %726 = fadd float %602, %723 %727 = fmul float %269, %724 %728 = fmul float %271, %725 %729 = fmul float %273, %726 %730 = fmul float %temp64.3, %674 %731 = fmul float %temp64.3, %674 %732 = fmul float %temp64.3, %674 %733 = fmul float %temp64.3, %674 %734 = call float @llvm.AMDIL.clamp.(float %730, float 0.000000e+00, float 1.000000e+00) %735 = call float @llvm.AMDIL.clamp.(float %731, float 0.000000e+00, float 1.000000e+00) %736 = call float @llvm.AMDIL.clamp.(float %732, float 0.000000e+00, float 1.000000e+00) %737 = call float @llvm.AMDIL.clamp.(float %733, float 0.000000e+00, float 1.000000e+00) %738 = fmul float %737, %608 %739 = fadd float %738, %727 %740 = fmul float %737, %608 %741 = fadd float %740, %728 %742 = fmul float %737, %608 %743 = fadd float %742, %729 %744 = fmul float %35, %102 %745 = fmul float %35, %103 %746 = fmul float %529, %189 %747 = fmul float %530, %190 %748 = fadd float %747, %746 %749 = fmul float %531, %191 %750 = fadd float %748, %749 %751 = fmul float %282, %189 %752 = fmul float %283, %190 %753 = fadd float %752, %751 %754 = fmul float %284, %191 %755 = fadd float %753, %754 %756 = fmul float %529, %27 %757 = fmul float %530, %28 %758 = fadd float %757, %756 %759 = fmul float %531, %29 %760 = fadd float %758, %759 %761 = fmul float %282, %27 %762 = fmul float %283, %28 %763 = fadd float %762, %761 %764 = fmul float %284, %29 %765 = fadd float %763, %764 %766 = fmul float %529, %30 %767 = fmul float %530, %31 %768 = fadd float %767, %766 %769 = fmul float %531, %32 %770 = fadd float %768, %769 %771 = fmul float %282, %30 %772 = fmul float %283, %31 %773 = fadd float %772, %771 %774 = fmul float %284, %32 %775 = fadd float %773, %774 %776 = bitcast float %744 to i32 %777 = bitcast float %745 to i32 %778 = insertelement <2 x i32> undef, i32 %776, i32 0 %779 = insertelement <2 x i32> %778, i32 %777, i32 1 %780 = bitcast <8 x i32> %68 to <32 x i8> %781 = bitcast <4 x i32> %70 to <16 x i8> %782 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %779, <32 x i8> %780, <16 x i8> %781, i32 2) %783 = extractelement <4 x float> %782, i32 0 %784 = extractelement <4 x float> %782, i32 1 %785 = extractelement <4 x float> %782, i32 2 %786 = extractelement <4 x float> %782, i32 3 %787 = fmul float %783, 2.000000e+00 %788 = fadd float %787, -1.000000e+00 %789 = fmul float %784, 2.000000e+00 %790 = fadd float %789, -1.000000e+00 %791 = fmul float %788, %33 %792 = fmul float %790, %33 %793 = fmul float %791, %791 %794 = fmul float %792, %792 %795 = fadd float %793, %794 %796 = fmul float 1.000000e+00, 1.000000e+00 %797 = fadd float %795, %796 %798 = fmul float 0.000000e+00, 0.000000e+00 %799 = fadd float %797, %798 %800 = call float @llvm.AMDGPU.rsq.clamped.f32(float %799) %801 = fmul float %791, %800 %802 = fmul float %792, %800 %803 = fmul float 1.000000e+00, %800 %804 = fmul float %785, 1.000000e+00 %805 = fadd float %804, 0.000000e+00 %806 = fmul float %786, 1.000000e+00 %807 = fadd float %806, 0.000000e+00 %808 = fmul float %786, 2.000000e+00 %809 = fadd float %808, -1.000000e+00 %810 = fmul float %791, %791 %811 = fmul float %792, %792 %812 = fmul float %792, %791 %813 = fsub float -0.000000e+00, %810 %814 = fmul float %805, %347 %815 = fadd float %814, %813 %816 = fsub float -0.000000e+00, %811 %817 = fmul float %807, %347 %818 = fadd float %817, %816 %819 = fsub float -0.000000e+00, %812 %820 = fmul float %809, %347 %821 = fadd float %820, %819 %822 = fmul float %760, %801 %823 = fmul float %765, %802 %824 = fadd float %823, %822 %825 = fmul float %312, %803 %826 = fadd float %824, %825 %827 = call float @llvm.AMDIL.clamp.(float %826, float 0.000000e+00, float 1.000000e+00) %828 = fmul float %770, %801 %829 = fmul float %775, %802 %830 = fadd float %829, %828 %831 = fmul float %327, %803 %832 = fadd float %830, %831 %833 = call float @llvm.AMDIL.clamp.(float %832, float 0.000000e+00, float 1.000000e+00) %834 = fadd float %750, %770 %835 = fadd float %755, %775 %836 = fadd float %294, %327 %837 = fdiv float 1.000000e+00, %836 %838 = fsub float -0.000000e+00, %791 %839 = fmul float %834, %837 %840 = fadd float %839, %838 %841 = fsub float -0.000000e+00, %792 %842 = fmul float %835, %837 %843 = fadd float %842, %841 %844 = fadd float %401, %815 %845 = fadd float %401, %818 %846 = fmul float %821, %821 %847 = fsub float -0.000000e+00, %846 %848 = fmul float %844, %845 %849 = fadd float %848, %847 %850 = fmul float %840, %840 %851 = fmul float %843, %845 %852 = fsub float -0.000000e+00, %821 %853 = fsub float -0.000000e+00, %821 %854 = fmul float %840, %852 %855 = fmul float %840, %853 %856 = fadd float %854, %855 %857 = fadd float %856, %851 %858 = fmul float %843, %857 %859 = fmul float %850, %845 %860 = fadd float %859, %858 %861 = fmul float %860, 5.000000e-01 %862 = fdiv float 1.000000e+00, %849 %863 = fmul float %861, %862 %864 = fsub float -0.000000e+00, %849 %865 = fcmp oge float %864, 0.000000e+00 %866 = sext i1 %865 to i32 %867 = bitcast i32 %866 to float %868 = bitcast float %867 to i32 %869 = icmp ne i32 %868, 0 %.174 = select i1 %869, float 1.000000e+00, float 0.000000e+00 %870 = fmul float %861, %862 %871 = fadd float %870, -1.600000e+01 %872 = fcmp oge float %871, 0.000000e+00 %873 = sext i1 %872 to i32 %874 = bitcast i32 %873 to float %875 = bitcast float %874 to i32 %876 = icmp ne i32 %875, 0 %temp40.0 = select i1 %876, float 1.000000e+00, float 0.000000e+00 %877 = fadd float %temp40.0, %.174 %878 = fmul float %863, 0xBFF7154CA0000000 %879 = call float @llvm.AMDIL.exp.(float %878) %880 = fcmp uge float %849, 0x3E7AD7F2A0000000 %881 = select i1 %880, float %849, float 0x3E7AD7F2A0000000 %882 = call float @llvm.AMDGPU.rsq.clamped.f32(float %881) %883 = fmul float %882, %879 %884 = fsub float -0.000000e+00, %877 %885 = fcmp oge float %884, 0.000000e+00 %886 = sext i1 %885 to i32 %887 = bitcast i32 %886 to float %888 = bitcast float %887 to i32 %889 = icmp ne i32 %888, 0 %.175 = select i1 %889, float %883, float 0.000000e+00 %890 = fmul float %801, %750 %891 = fmul float %802, %755 %892 = fadd float %891, %890 %893 = fmul float %803, %294 %894 = fadd float %892, %893 %895 = fsub float -0.000000e+00, %894 %896 = fadd float %895, 1.000000e+00 %897 = fmul float %896, %896 %898 = fmul float %897, %897 %899 = fmul float %896, %898 %900 = fmul float %802, %178 %901 = fmul float %803, %178 %902 = fadd float %901, %900 %903 = fmul float %801, %180 %904 = fadd float %902, %903 %905 = fmul float %801, %179 %906 = fmul float %803, %179 %907 = fadd float %906, %905 %908 = fmul float %802, %180 %909 = fadd float %907, %908 %910 = fmul float %178, -1.000000e+00 %911 = fmul float %179, -1.000000e+00 %912 = fmul float %180, 1.000000e+00 %913 = fmul float %801, %910 %914 = fmul float %802, %911 %915 = fadd float %914, %913 %916 = fmul float %803, %912 %917 = fadd float %915, %916 %918 = insertelement <4 x float> undef, float %904, i32 0 %919 = insertelement <4 x float> %918, float %909, i32 1 %920 = insertelement <4 x float> %919, float %917, i32 2 %921 = insertelement <4 x float> %920, float %898, i32 3 %922 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %921) %923 = extractelement <4 x float> %922, i32 0 %924 = extractelement <4 x float> %922, i32 1 %925 = extractelement <4 x float> %922, i32 2 %926 = extractelement <4 x float> %922, i32 3 %927 = call float @fabs(float %925) %928 = fdiv float 1.000000e+00, %927 %929 = fmul float %923, %928 %930 = fadd float %929, 1.500000e+00 %931 = fmul float %924, %928 %932 = fadd float %931, 1.500000e+00 %933 = bitcast float %932 to i32 %934 = bitcast float %930 to i32 %935 = bitcast float %926 to i32 %936 = insertelement <4 x i32> undef, i32 %933, i32 0 %937 = insertelement <4 x i32> %936, i32 %934, i32 1 %938 = insertelement <4 x i32> %937, i32 %935, i32 2 %939 = insertelement <4 x i32> %938, i32 undef, i32 3 %940 = bitcast <8 x i32> %60 to <32 x i8> %941 = bitcast <4 x i32> %62 to <16 x i8> %942 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %939, <32 x i8> %940, <16 x i8> %941, i32 4) %943 = extractelement <4 x float> %942, i32 0 %944 = extractelement <4 x float> %942, i32 1 %945 = extractelement <4 x float> %942, i32 2 %946 = call float @llvm.pow.f32(float %943, float 0x40019999A0000000) %947 = call float @llvm.pow.f32(float %944, float 0x40019999A0000000) %948 = call float @llvm.pow.f32(float %945, float 0x40019999A0000000) %949 = fadd float %827, %946 %950 = fadd float %827, %947 %951 = fadd float %827, %948 %952 = fmul float %269, %949 %953 = fmul float %271, %950 %954 = fmul float %273, %951 %955 = fmul float %.175, %899 %956 = fmul float %.175, %899 %957 = fmul float %.175, %899 %958 = fmul float %.175, %899 %959 = call float @llvm.AMDIL.clamp.(float %955, float 0.000000e+00, float 1.000000e+00) %960 = call float @llvm.AMDIL.clamp.(float %956, float 0.000000e+00, float 1.000000e+00) %961 = call float @llvm.AMDIL.clamp.(float %957, float 0.000000e+00, float 1.000000e+00) %962 = call float @llvm.AMDIL.clamp.(float %958, float 0.000000e+00, float 1.000000e+00) %963 = fmul float %960, %833 %964 = fadd float %963, %952 %965 = fmul float %960, %833 %966 = fadd float %965, %953 %967 = fmul float %960, %833 %968 = fadd float %967, %954 %969 = fmul float %211, %739 %970 = fmul float %211, %741 %971 = fmul float %211, %743 %972 = fmul float %522, %210 %973 = fadd float %972, %969 %974 = fmul float %524, %210 %975 = fadd float %974, %970 %976 = fmul float %526, %210 %977 = fadd float %976, %971 %978 = fmul float %964, %212 %979 = fadd float %978, %973 %980 = fmul float %966, %212 %981 = fadd float %980, %975 %982 = fmul float %968, %212 %983 = fadd float %982, %977 %984 = fmul float %269, 0x3FD3333340000000 %985 = fmul float %271, 0x3FE2E147A0000000 %986 = fadd float %985, %984 %987 = fmul float %273, 0x3FBC28F5C0000000 %988 = fadd float %986, %987 %989 = fsub float -0.000000e+00, %988 %990 = fadd float %989, 1.000000e+00 %991 = fmul float %990, %37 %992 = fmul float %990, %38 %993 = fmul float %990, %39 %994 = call float @llvm.AMDIL.clamp.(float %312, float 0.000000e+00, float 1.000000e+00) %995 = insertelement <4 x float> undef, float %178, i32 0 %996 = insertelement <4 x float> %995, float %179, i32 1 %997 = insertelement <4 x float> %996, float %180, i32 2 %998 = insertelement <4 x float> %997, float %264, i32 3 %999 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %998) %1000 = extractelement <4 x float> %999, i32 0 %1001 = extractelement <4 x float> %999, i32 1 %1002 = extractelement <4 x float> %999, i32 2 %1003 = extractelement <4 x float> %999, i32 3 %1004 = call float @fabs(float %1002) %1005 = fdiv float 1.000000e+00, %1004 %1006 = fmul float %1000, %1005 %1007 = fadd float %1006, 1.500000e+00 %1008 = fmul float %1001, %1005 %1009 = fadd float %1008, 1.500000e+00 %1010 = bitcast float %1009 to i32 %1011 = bitcast float %1007 to i32 %1012 = bitcast float %1003 to i32 %1013 = insertelement <4 x i32> undef, i32 %1010, i32 0 %1014 = insertelement <4 x i32> %1013, i32 %1011, i32 1 %1015 = insertelement <4 x i32> %1014, i32 %1012, i32 2 %1016 = insertelement <4 x i32> %1015, i32 undef, i32 3 %1017 = bitcast <8 x i32> %60 to <32 x i8> %1018 = bitcast <4 x i32> %62 to <16 x i8> %1019 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %1016, <32 x i8> %1017, <16 x i8> %1018, i32 4) %1020 = extractelement <4 x float> %1019, i32 0 %1021 = extractelement <4 x float> %1019, i32 1 %1022 = extractelement <4 x float> %1019, i32 2 %1023 = call float @llvm.pow.f32(float %1020, float 0x40019999A0000000) %1024 = call float @llvm.pow.f32(float %1021, float 0x40019999A0000000) %1025 = call float @llvm.pow.f32(float %1022, float 0x40019999A0000000) %1026 = fadd float %1023, %994 %1027 = fadd float %1024, %994 %1028 = fadd float %1025, %994 %1029 = fmul float %991, %1026 %1030 = fadd float %1029, %979 %1031 = fmul float %992, %1027 %1032 = fadd float %1031, %981 %1033 = fmul float %993, %1028 %1034 = fadd float %1033, %983 %1035 = fmul float %108, 5.000000e-01 %1036 = fadd float %1035, 5.000000e-01 %1037 = bitcast float %1036 to i32 %1038 = bitcast float %58 to i32 %1039 = insertelement <2 x i32> undef, i32 %1037, i32 0 %1040 = insertelement <2 x i32> %1039, i32 %1038, i32 1 %1041 = bitcast <8 x i32> %84 to <32 x i8> %1042 = bitcast <4 x i32> %86 to <16 x i8> %1043 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1040, <32 x i8> %1041, <16 x i8> %1042, i32 2) %1044 = extractelement <4 x float> %1043, i32 0 %1045 = fsub float -0.000000e+00, %24 %1046 = fadd float %1045, %102 %1047 = fsub float -0.000000e+00, %25 %1048 = fadd float %1047, %103 %1049 = fsub float -0.000000e+00, %26 %1050 = fadd float %1049, %104 %1051 = fmul float %1046, %1046 %1052 = fmul float %1048, %1048 %1053 = fadd float %1052, %1051 %1054 = fmul float %1050, %1050 %1055 = fadd float %1053, %1054 %1056 = fmul float %1055, %57 %1057 = fmul float %1050, %56 %1058 = fmul float %1057, 0x3FF7154CA0000000 %1059 = call float @llvm.AMDIL.exp.(float %1058) %1060 = fsub float -0.000000e+00, %1059 %1061 = fadd float %1060, 1.000000e+00 %1062 = fmul float %1061, %1056 %1063 = fdiv float 1.000000e+00, %1050 %1064 = fmul float %1063, %1062 %1065 = fmul float %1064, 0x3FF7154CA0000000 %1066 = call float @llvm.AMDIL.exp.(float %1065) %1067 = call float @llvm.AMDIL.clamp.(float %1066, float 0.000000e+00, float 1.000000e+00) %1068 = fsub float -0.000000e+00, %1067 %1069 = fadd float %1068, 1.000000e+00 %1070 = fmul float %1069, %1044 %1071 = fsub float -0.000000e+00, %40 %1072 = fadd float %1071, %104 %1073 = fcmp oge float %1072, 0.000000e+00 %1074 = sext i1 %1073 to i32 %1075 = bitcast i32 %1074 to float %1076 = bitcast float %1075 to i32 %1077 = icmp ne i32 %1076, 0 %temp48.0 = select i1 %1077, float 1.000000e+00, float 0.000000e+00 %1078 = fsub float -0.000000e+00, %104 %1079 = fadd float %40, %1078 %1080 = fsub float -0.000000e+00, %45 %1081 = fadd float %1080, %48 %1082 = fsub float -0.000000e+00, %46 %1083 = fadd float %1082, %49 %1084 = fsub float -0.000000e+00, %47 %1085 = fadd float %1084, %50 %1086 = fmul float %1079, %51 %1087 = fmul float %1079, %51 %1088 = fmul float %1079, %51 %1089 = fmul float %1079, %51 %1090 = call float @llvm.AMDIL.clamp.(float %1086, float 0.000000e+00, float 1.000000e+00) %1091 = call float @llvm.AMDIL.clamp.(float %1087, float 0.000000e+00, float 1.000000e+00) %1092 = call float @llvm.AMDIL.clamp.(float %1088, float 0.000000e+00, float 1.000000e+00) %1093 = call float @llvm.AMDIL.clamp.(float %1089, float 0.000000e+00, float 1.000000e+00) %1094 = fmul float %1091, %1081 %1095 = fadd float %1094, %45 %1096 = fmul float %1091, %1083 %1097 = fadd float %1096, %46 %1098 = fmul float %1091, %1085 %1099 = fadd float %1098, %47 %1100 = fmul float %1030, %1095 %1101 = fmul float %1032, %1097 %1102 = fmul float %1034, %1099 %1103 = fsub float -0.000000e+00, %191 %1104 = fdiv float 1.000000e+00, %1103 %1105 = fmul float %1079, %1104 %1106 = fmul float %1105, %189 %1107 = fmul float %1105, %190 %1108 = fmul float %1105, %191 %1109 = fmul float %1106, %1106 %1110 = fmul float %1107, %1107 %1111 = fadd float %1110, %1109 %1112 = fmul float %1108, %1108 %1113 = fadd float %1111, %1112 %1114 = fcmp uge float %1113, 0x3E7AD7F2A0000000 %1115 = select i1 %1114, float %1113, float 0x3E7AD7F2A0000000 %1116 = call float @llvm.AMDGPU.rsq.clamped.f32(float %1115) %1117 = fmul float %1116, %1115 %1118 = fsub float -0.000000e+00, %1115 %1119 = call float @llvm.AMDGPU.cndlt(float %1118, float %1117, float 0.000000e+00) %1120 = fsub float -0.000000e+00, %1119 %1121 = fmul float %1120, %52 %1122 = fmul float %1121, 0x3FF7154CA0000000 %1123 = call float @llvm.AMDIL.exp.(float %1122) %1124 = fmul float %191, %191 %1125 = fmul float %1124, %1124 %1126 = fsub float -0.000000e+00, %1125 %1127 = fmul float %191, %1126 %1128 = fadd float %1127, 1.000000e+00 %1129 = fsub float -0.000000e+00, %1128 %1130 = fadd float %1129, 1.000000e+00 %1131 = fmul float %1123, %1130 %1132 = fmul float %1131, %1100 %1133 = fmul float %1131, %1101 %1134 = fmul float %1131, %1102 %1135 = fsub float -0.000000e+00, %1069 %1136 = fmul float %1044, %1135 %1137 = fadd float %1136, 1.000000e+00 %1138 = fmul float %1137, %1132 %1139 = fmul float %1137, %1133 %1140 = fmul float %1137, %1134 %1141 = fcmp oge float %1072, 0.000000e+00 %1142 = sext i1 %1141 to i32 %1143 = bitcast i32 %1142 to float %1144 = bitcast float %1143 to i32 %1145 = icmp ne i32 %1144, 0 %.176 = select i1 %1145, float %1030, float %1138 %1146 = fcmp oge float %1072, 0.000000e+00 %1147 = sext i1 %1146 to i32 %1148 = bitcast i32 %1147 to float %1149 = bitcast float %1148 to i32 %1150 = icmp ne i32 %1149, 0 %temp32.0 = select i1 %1150, float %1032, float %1139 %1151 = fcmp oge float %1072, 0.000000e+00 %1152 = sext i1 %1151 to i32 %1153 = bitcast i32 %1152 to float %1154 = bitcast float %1153 to i32 %1155 = icmp ne i32 %1154, 0 %.177 = select i1 %1155, float %1034, float %1140 %1156 = fcmp oge float %1072, 0.000000e+00 %1157 = sext i1 %1156 to i32 %1158 = bitcast i32 %1157 to float %1159 = bitcast float %1158 to i32 %1160 = icmp ne i32 %1159, 0 %temp16.2 = select i1 %1160, float %1070, float 0.000000e+00 %1161 = call float @fabs(float %.176) %1162 = call float @llvm.log2.f32(float %1161) %1163 = call float @fabs(float %temp32.0) %1164 = call float @llvm.log2.f32(float %1163) %1165 = call float @fabs(float %.177) %1166 = call float @llvm.log2.f32(float %1165) %1167 = fmul float %1162, 0x3FDD1743E0000000 %1168 = fmul float %1164, 0x3FDD1743E0000000 %1169 = fmul float %1166, 0x3FDD1743E0000000 %1170 = call float @llvm.AMDIL.exp.(float %1167) %1171 = call float @llvm.AMDIL.exp.(float %1168) %1172 = call float @llvm.AMDIL.exp.(float %1169) %1173 = bitcast float %111 to i32 %1174 = bitcast float %112 to i32 %1175 = insertelement <2 x i32> undef, i32 %1173, i32 0 %1176 = insertelement <2 x i32> %1175, i32 %1174, i32 1 %1177 = bitcast <8 x i32> %92 to <32 x i8> %1178 = bitcast <4 x i32> %94 to <16 x i8> %1179 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1176, <32 x i8> %1177, <16 x i8> %1178, i32 2) %1180 = extractelement <4 x float> %1179, i32 0 %1181 = extractelement <4 x float> %1179, i32 1 %1182 = extractelement <4 x float> %1179, i32 2 %1183 = call float @llvm.AMDGPU.lrp(float %124, float %1170, float %1180) %1184 = call float @llvm.AMDGPU.lrp(float %124, float %1171, float %1181) %1185 = call float @llvm.AMDGPU.lrp(float %124, float %1172, float %1182) %1186 = bitcast float %1183 to i32 %1187 = bitcast float %1184 to i32 %1188 = bitcast float %1185 to i32 %1189 = insertelement <4 x i32> undef, i32 %1186, i32 0 %1190 = insertelement <4 x i32> %1189, i32 %1187, i32 1 %1191 = insertelement <4 x i32> %1190, i32 %1188, i32 2 %1192 = insertelement <4 x i32> %1191, i32 undef, i32 3 %1193 = bitcast <8 x i32> %96 to <32 x i8> %1194 = bitcast <4 x i32> %98 to <16 x i8> %1195 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %1192, <32 x i8> %1193, <16 x i8> %1194, i32 3) %1196 = extractelement <4 x float> %1195, i32 0 %1197 = extractelement <4 x float> %1195, i32 1 %1198 = extractelement <4 x float> %1195, i32 2 %1199 = fsub float -0.000000e+00, %124 %1200 = fmul float %125, %1199 %1201 = fadd float %1200, %124 %1202 = call float @llvm.AMDGPU.lrp(float %1201, float %1196, float %1183) %1203 = call float @llvm.AMDGPU.lrp(float %1201, float %1197, float %1184) %1204 = call float @llvm.AMDGPU.lrp(float %1201, float %1198, float %1185) %1205 = fsub float -0.000000e+00, %1202 %1206 = fadd float %1205, %53 %1207 = fsub float -0.000000e+00, %1203 %1208 = fadd float %1207, %54 %1209 = fsub float -0.000000e+00, %1204 %1210 = fadd float %1209, %55 %1211 = fmul float %temp16.2, %1206 %1212 = fadd float %1211, %1202 %1213 = fmul float %temp16.2, %1208 %1214 = fadd float %1213, %1203 %1215 = fmul float %temp16.2, %1210 %1216 = fadd float %1215, %1204 %1217 = call i32 @llvm.SI.packf16(float %1212, float %1214) %1218 = bitcast i32 %1217 to float %1219 = call i32 @llvm.SI.packf16(float %1216, float %temp48.0) %1220 = bitcast i32 %1219 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1218, float %1220, float %1218, float %1220) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.AMDGPU.kilp() ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 4, [m0] ; C8081000 V_INTERP_P2_F32 v2, [v2], v1, 0, 4, [m0] ; C8091001 V_INTERP_P1_F32 v4, v0, 1, 4, [m0] ; C8101100 V_INTERP_P2_F32 v4, [v4], v1, 1, 4, [m0] ; C8111101 V_SUB_F32_e32 v3, 1.000000e+00, v4 ; 080608F2 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x1c ; C086051C S_LOAD_DWORDX8 s[16:23], s[6:7], 0x38 ; C0C80738 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800F00 00640202 V_MOV_B32_e32 v6, 0x80000000 ; 7E0C02FF 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 V_OR_B32_e32 v7, v2, v6 ; 380E0D02 V_CMP_GE_F32_e64 s[0:1], v7, 0.000000e+00, 0, 0 ; D00C0000 00010107 V_CNDMASK_B32_e64 v6, v6, -1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000006 0001E706 V_CMP_LT_F32_e64 s[0:1], v6, 0.000000e+00, 0, 0 ; D0020000 00010106 V_CNDMASK_B32_e64 v6, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000006 00018280 V_OR_B32_e32 v6, v6, v6 ; 380C0D06 V_CMP_NE_I32_e64 s[0:1], v6, 0, 0, 0 ; D10A0000 00010106 V_INTERP_P1_F32 v7, v0, 1, 5, [m0] ; C81C1500 V_INTERP_P2_F32 v7, [v7], v1, 1, 5, [m0] ; C81D1501 V_INTERP_P1_F32 v6, v0, 0, 5, [m0] ; C8181400 V_INTERP_P2_F32 v6, [v6], v1, 0, 5, [m0] ; C8191401 V_INTERP_P1_F32 v9, v0, 1, 3, [m0] ; C8240D00 V_INTERP_P2_F32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 V_INTERP_P1_F32 v46, v0, 2, 2, [m0] ; C8B80A00 V_INTERP_P2_F32 v46, [v46], v1, 2, 2, [m0] ; C8B90A01 V_INTERP_P1_F32 v47, v0, 1, 2, [m0] ; C8BC0900 V_INTERP_P2_F32 v47, [v47], v1, 1, 2, [m0] ; C8BD0901 V_INTERP_P1_F32 v59, v0, 0, 2, [m0] ; C8EC0800 V_INTERP_P2_F32 v59, [v59], v1, 0, 2, [m0] ; C8ED0801 V_INTERP_P1_F32 v12, v0, 2, 1, [m0] ; C8300600 V_INTERP_P2_F32 v12, [v12], v1, 2, 1, [m0] ; C8310601 V_INTERP_P1_F32 v14, v0, 1, 1, [m0] ; C8380500 V_INTERP_P2_F32 v14, [v14], v1, 1, 1, [m0] ; C8390501 V_INTERP_P1_F32 v17, v0, 0, 1, [m0] ; C8440400 V_INTERP_P2_F32 v17, [v17], v1, 0, 1, [m0] ; C8450401 V_INTERP_P1_F32 v25, v0, 2, 0, [m0] ; C8640200 V_INTERP_P2_F32 v25, [v25], v1, 2, 0, [m0] ; C8650201 V_INTERP_P1_F32 v26, v0, 1, 0, [m0] ; C8680100 V_INTERP_P2_F32 v26, [v26], v1, 1, 0, [m0] ; C8690101 V_INTERP_P1_F32 v27, v0, 0, 0, [m0] ; C86C0000 V_INTERP_P2_F32 v27, [v27], v1, 0, 0, [m0] ; C86D0001 S_LOAD_DWORDX4 s[8:11], s[2:3], 0x0 ; C0840300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s2, s[8:11], 0x43 ; C2010943 S_BUFFER_LOAD_DWORD s3, s[8:11], 0x41 ; C2018941 S_BUFFER_LOAD_DWORD s12, s[8:11], 0x40 ; C2060940 S_BUFFER_LOAD_DWORD s13, s[8:11], 0x3e ; C206893E S_BUFFER_LOAD_DWORD s14, s[8:11], 0x3d ; C207093D S_BUFFER_LOAD_DWORD s15, s[8:11], 0x3c ; C207893C S_BUFFER_LOAD_DWORD s16, s[8:11], 0x38 ; C2080938 S_BUFFER_LOAD_DWORD s17, s[8:11], 0x34 ; C2088934 S_BUFFER_LOAD_DWORD s18, s[8:11], 0x32 ; C2090932 S_BUFFER_LOAD_DWORD s19, s[8:11], 0x31 ; C2098931 S_BUFFER_LOAD_DWORD s20, s[8:11], 0x30 ; C20A0930 S_BUFFER_LOAD_DWORD s21, s[8:11], 0x2e ; C20A892E S_BUFFER_LOAD_DWORD s22, s[8:11], 0x2d ; C20B092D S_BUFFER_LOAD_DWORD s23, s[8:11], 0x2c ; C20B892C S_BUFFER_LOAD_DWORD s24, s[8:11], 0x29 ; C20C0929 S_BUFFER_LOAD_DWORD s25, s[8:11], 0x28 ; C20C8928 S_BUFFER_LOAD_DWORD s26, s[8:11], 0x25 ; C20D0925 S_BUFFER_LOAD_DWORD s27, s[8:11], 0x24 ; C20D8924 S_BUFFER_LOAD_DWORD s28, s[8:11], 0x20 ; C20E0920 S_BUFFER_LOAD_DWORD s29, s[8:11], 0x1e ; C20E891E S_BUFFER_LOAD_DWORD s30, s[8:11], 0x1d ; C20F091D S_BUFFER_LOAD_DWORD s31, s[8:11], 0x1c ; C20F891C S_BUFFER_LOAD_DWORD s32, s[8:11], 0x18 ; C2100918 S_BUFFER_LOAD_DWORD s33, s[8:11], 0x14 ; C2108914 S_BUFFER_LOAD_DWORD s34, s[8:11], 0x10 ; C2110910 S_BUFFER_LOAD_DWORD s35, s[8:11], 0xc ; C211890C S_BUFFER_LOAD_DWORD s36, s[8:11], 0xa ; C212090A S_BUFFER_LOAD_DWORD s37, s[8:11], 0x9 ; C2128909 S_BUFFER_LOAD_DWORD s38, s[8:11], 0x8 ; C2130908 S_BUFFER_LOAD_DWORD s39, s[8:11], 0x6 ; C2138906 S_BUFFER_LOAD_DWORD s40, s[8:11], 0x5 ; C2140905 S_BUFFER_LOAD_DWORD s41, s[8:11], 0x4 ; C2148904 S_BUFFER_LOAD_DWORD s42, s[8:11], 0x2 ; C2150902 S_BUFFER_LOAD_DWORD s43, s[8:11], 0x1 ; C2158901 S_BUFFER_LOAD_DWORD s8, s[8:11], 0x0 ; C2040900 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v20, s2 ; 7E280202 V_MOV_B32_e32 v23, s3 ; 7E2E0203 V_MOV_B32_e32 v22, s12 ; 7E2C020C V_MOV_B32_e32 v0, s13 ; 7E00020D V_MOV_B32_e32 v8, s14 ; 7E10020E V_MOV_B32_e32 v1, s15 ; 7E02020F V_MOV_B32_e32 v24, s16 ; 7E300210 V_MOV_B32_e32 v32, s17 ; 7E400211 V_MOV_B32_e32 v35, s18 ; 7E460212 V_MOV_B32_e32 v15, s19 ; 7E1E0213 V_MOV_B32_e32 v10, s20 ; 7E140214 V_MOV_B32_e32 v34, s21 ; 7E440215 V_MOV_B32_e32 v16, s22 ; 7E200216 V_MOV_B32_e32 v11, s23 ; 7E160217 V_MOV_B32_e32 v33, s24 ; 7E420218 V_MOV_B32_e32 v60, s25 ; 7E780219 V_MOV_B32_e32 v49, s26 ; 7E62021A V_MOV_B32_e32 v38, s27 ; 7E4C021B V_MOV_B32_e32 v21, s28 ; 7E2A021C V_MOV_B32_e32 v37, s29 ; 7E4A021D V_MOV_B32_e32 v18, s30 ; 7E24021E V_MOV_B32_e32 v13, s31 ; 7E1A021F V_MOV_B32_e32 v62, s32 ; 7E7C0220 V_MOV_B32_e32 v57, s33 ; 7E720221 V_MOV_B32_e32 v63, s34 ; 7E7E0222 V_MOV_B32_e32 v56, s35 ; 7E700223 V_MOV_B32_e32 v43, s36 ; 7E560224 V_MOV_B32_e32 v44, s37 ; 7E580225 V_MOV_B32_e32 v61, s38 ; 7E7A0226 V_MOV_B32_e32 v58, s39 ; 7E740227 V_MOV_B32_e32 v55, s40 ; 7E6E0228 V_MOV_B32_e32 v31, s41 ; 7E3E0229 V_MOV_B32_e32 v28, s42 ; 7E38022A V_MOV_B32_e32 v30, s43 ; 7E3C022B V_MOV_B32_e32 v29, s8 ; 7E3A0208 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E V_MUL_F32_e32 v65, v33, v12 ; 10821921 V_MUL_F32_e32 v64, v60, v14 ; 10801D3C S_LOAD_DWORDX4 s[20:23], s[4:5], 0x10 ; C08A0510 S_LOAD_DWORDX8 s[24:31], s[6:7], 0x20 ; C0CC0720 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[66:69], 15, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[24:31], s[20:23] ; F0800F00 00A64240 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v33, v66, v66 ; 06428542 V_ADD_F32_e32 v33, -1.000000e+00, v33 ; 064242F3 V_MUL_F32_e32 v70, v33, v56 ; 108C7121 V_ADD_F32_e32 v33, v67, v67 ; 06428743 V_ADD_F32_e32 v33, -1.000000e+00, v33 ; 064242F3 V_MUL_F32_e32 v71, v33, v56 ; 108E7121 V_MUL_F32_e32 v72, v71, v71 ; 10908F47 V_MAD_F32 v33, v70, v70, v72, 0, 0 ; D2820021 05228D46 V_ADD_F32_e32 v33, 1.000000e+00, v33 ; 064242F2 V_RSQ_CLAMP_F32_e32 v73, v33 ; 7E925921 V_MUL_F32_e32 v74, v71, v73 ; 10949347 V_MUL_F32_e32 v75, v70, v73 ; 10969346 V_MUL_F32_e32 v33, v26, v26 ; 1042351A V_MAD_F32 v33, v27, v27, v33, 0, 0 ; D2820021 0486371B V_MAD_F32 v33, v25, v25, v33, 0, 0 ; D2820021 04863319 V_RSQ_CLAMP_F32_e32 v33, v33 ; 7E425921 V_MUL_F32_e32 v39, v27, v33 ; 104E431B V_MUL_F32_e32 v27, v75, v39 ; 10364F4B V_MUL_F32_e32 v41, v25, v33 ; 10524319 V_MAD_F32 v25, v74, v41, v27, 0, 0 ; D2820019 046E534A V_MAD_F32 v78, v73, v41, v25, 0, 0 ; D282004E 04665349 V_MUL_F32_e32 v25, v74, v39 ; 10324F4A V_MUL_F32_e32 v40, v26, v33 ; 1050431A V_MAD_F32 v25, v75, v40, v25, 0, 0 ; D2820019 0466514B V_MAD_F32 v77, v73, v40, v25, 0, 0 ; D282004D 04665149 V_MUL_F32_e32 v25, v74, v40 ; 1032514A V_MUL_F32_e64 v26, v75, -v41, 0, 0 ; D210001A 4002534B V_SUB_F32_e32 v25, v26, v25 ; 0832331A V_MAD_F32 v76, v73, v39, v25, 0, 0 ; D282004C 04664F49 V_MOV_B32_e32 v79, 0.000000e+00 ; 7E9E0280 V_CUBESC_F32 v81, v76, v77, v78, 0, 0 ; D28A0051 053A9B4C V_CUBETC_F32 v80, v76, v77, v78, 0, 0 ; D28C0050 053A9B4C V_CUBEMA_F32 v82, v76, v77, v78, 0, 0 ; D28E0052 053A9B4C V_CUBEID_F32 v83, v76, v77, v78, 0, 0 ; D2880053 053A9B4C V_MOV_B32_e32 v25, 0x7fffffff ; 7E3202FF 7FFFFFFF V_AND_B32_e32 v25, v82, v25 ; 36323352 V_RCP_F32_e32 v25, v25 ; 7E325519 V_MOV_B32_e32 v45, 1.500000e+00 ; 7E5A02FF 3FC00000 V_MAD_F32 v82, v80, v25, v45, 0, 0 ; D2820052 04B63350 V_MAD_F32 v81, v81, v25, v45, 0, 0 ; D2820051 04B63351 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x0 ; C0840500 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x0 ; C0C60700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[81:84], s[12:19], s[8:11] ; F0800700 00431951 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v33, v27 ; 7E424F1B V_MUL_LEGACY_F32_e32 v33, 2.200000e+00, v33 ; 0E4242FF 400CCCCD V_EXP_F32_e32 v33, v33 ; 7E424B21 V_MUL_F32_e32 v36, v40, v55 ; 10486F28 V_MUL_F32_e32 v80, v41, v31 ; 10A03F29 V_SUB_F32_e32 v36, v80, v36 ; 08484950 V_MUL_F32_e32 v81, v39, v58 ; 10A27527 V_SUB_F32_e32 v36, v36, v81 ; 0848A324 V_MUL_F32_e32 v36, v36, v75 ; 10489724 V_MUL_F32_e32 v48, v40, v31 ; 10603F28 V_MUL_F32_e32 v50, v39, v55 ; 10646F27 V_SUB_F32_e32 v48, v50, v48 ; 08606132 V_MAD_F32 v82, v41, v58, v48, 0, 0 ; D2820052 04C27529 V_MUL_F32_e32 v48, v82, v74 ; 10609552 V_SUB_F32_e32 v36, v48, v36 ; 08484930 V_MUL_F32_e32 v83, v39, v31 ; 10A63F27 V_MAD_F32 v31, v40, v55, v83, 0, 0 ; D282001F 054E6F28 V_MAD_F32 v48, v41, v58, v31, 0, 0 ; D2820030 047E7529 V_MAD_F32 v31, v48, v73, v36, 0, 0 ; D282001F 04929330 V_ADD_F32_e64 v31, v31, 0, 1, 0 ; D206081F 0001011F V_ADD_F32_e32 v84, v31, v33 ; 06A8431F V_MOV_B32_e32 v33, 0x7fffffff ; 7E4202FF 7FFFFFFF V_AND_B32_e32 v36, v40, v33 ; 36484328 V_MUL_F32_e64 v36, v36, |v40|, 0, 0 ; D2100224 00025124 V_AND_B32_e32 v33, v39, v33 ; 36424327 V_MUL_F32_e64 v33, v33, |v39|, 0, 0 ; D2100221 00024F21 V_MUL_F32_e32 v33, v33, v33 ; 10424321 V_MAD_F32 v50, v36, v36, v33, 0, 0 ; D2820032 04864924 V_MOV_B32_e32 v51, 0x7fffffff ; 7E6602FF 7FFFFFFF V_AND_B32_e32 v51, v41, v51 ; 36666729 V_MUL_F32_e64 v85, v51, |v41|, 0, 0 ; D2100255 00025333 V_MAD_F32 v50, v85, v85, v50, 0, 0 ; D2820032 04CAAB55 V_RCP_F32_e32 v86, v50 ; 7EAC5532 V_MUL_F32_e32 v33, v86, v33 ; 10424356 V_MUL_F32_e32 v36, v36, v36 ; 10484924 V_MUL_F32_e32 v36, v86, v36 ; 10484956 V_MUL_F32_e32 v53, v49, v12 ; 106A1931 V_MUL_F32_e32 v52, v38, v17 ; 10682326 S_LOAD_DWORDX4 s[0:3], s[4:5], 0xc ; C080050C S_LOAD_DWORDX8 s[32:39], s[6:7], 0x18 ; C0D00718 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[49:51], 7, 0, 0, 0, 0, 0, 0, 0, v[52:53], s[32:39], s[0:3] ; F0800700 00083134 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v54, v51 ; 7E6C4F33 V_MUL_LEGACY_F32_e32 v54, 2.200000e+00, v54 ; 0E6C6CFF 400CCCCD V_EXP_F32_e32 v54, v54 ; 7E6C4B36 V_MUL_F32_e32 v87, v36, v54 ; 10AE6D24 V_MUL_F32_e32 v52, v38, v14 ; 10681D26 IMAGE_SAMPLE v[52:54], 7, 0, 0, 0, 0, 0, 0, 0, v[52:53], s[32:39], s[0:3] ; F0800700 00083434 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v38, v54 ; 7E4C4F36 V_MUL_LEGACY_F32_e32 v38, 2.200000e+00, v38 ; 0E4C4CFF 400CCCCD V_EXP_F32_e32 v38, v38 ; 7E4C4B26 V_MAD_F32 v87, v38, v33, v87, 0, 0 ; D2820057 055E4326 V_MUL_F32_e32 v38, v85, v85 ; 104CAB55 V_MUL_F32_e32 v38, v86, v38 ; 104C4D56 V_MUL_F32_e32 v86, v63, v14 ; 10AC1D3F V_MUL_F32_e32 v85, v63, v17 ; 10AA233F S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[32:39], s[6:7], 0x8 ; C0D00708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[88:91], 15, 0, 0, 0, 0, 0, 0, 0, v[85:86], s[32:39], s[0:3] ; F0800F00 00085855 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v63, v90 ; 7E7E4F5A V_MUL_LEGACY_F32_e32 v63, 2.200000e+00, v63 ; 0E7E7EFF 400CCCCD V_EXP_F32_e32 v63, v63 ; 7E7E4B3F V_MAD_F32 v63, v63, v38, v87, 0, 0 ; D282003F 055E4D3F V_MUL_F32_e32 v84, v63, v84 ; 10A8A93F V_MUL_F32_e32 v85, v56, v56 ; 10AA7138 V_MUL_F32_e32 v86, v69, v85 ; 10ACAB45 V_SUB_F32_e32 v72, v86, v72 ; 08909156 V_RCP_F32_e32 v62, v62 ; 7E7C553E V_ADD_F32_e32 v72, v62, v72 ; 0690913E V_MUL_F32_e32 v66, v68, v85 ; 1084AB44 V_MUL_F32_e32 v67, v70, v70 ; 10868D46 V_SUB_F32_e32 v66, v66, v67 ; 08848742 V_ADD_F32_e32 v66, v62, v66 ; 0684853E V_MUL_F32_e32 v66, v66, v72 ; 10849142 V_MUL_F32_e32 v67, v71, v70 ; 10868D47 S_LOAD_DWORDX4 s[32:35], s[4:5], 0x14 ; C0900514 S_LOAD_DWORDX8 s[36:43], s[6:7], 0x28 ; C0D20728 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v68, 8, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[36:43], s[32:35] ; F0800800 01094440 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v68, v68, v68 ; 06888944 V_ADD_F32_e32 v68, -1.000000e+00, v68 ; 068888F3 V_MUL_F32_e32 v68, v68, v85 ; 1088AB44 V_SUB_F32_e32 v67, v68, v67 ; 08868744 V_MUL_F32_e32 v68, v67, v67 ; 10888743 V_SUB_F32_e32 v66, v66, v68 ; 08848942 V_MOV_B32_e32 v68, 1.000000e-07 ; 7E8802FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v66, v68 ; 7C0C8942 V_CMP_U_F32_e64 s[0:1], v66, v66, 0, 0 ; D0100000 00028542 V_CNDMASK_B32_e64 v69, 0, -1, vcc, 0, 0, 0, 0 ; D2000045 01A98280 V_CNDMASK_B32_e64 v86, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000056 00018280 V_OR_B32_e32 v69, v69, v86 ; 388AAD45 V_MOV_B32_e32 v86, 0x33d6bf95 ; 7EAC02FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v69, 0, 0, 0 ; D10A0000 00010145 V_CNDMASK_B32_e64 v69, v86, v66, s[0:1], 0, 0, 0, 0 ; D2000045 00028556 V_RSQ_CLAMP_F32_e32 v69, v69 ; 7E8A5945 V_MUL_F32_e32 v87, v47, v47 ; 10AE5F2F V_MAD_F32 v87, v59, v59, v87, 0, 0 ; D2820057 055E773B V_MAD_F32 v87, v46, v46, v87, 0, 0 ; D2820057 055E5D2E V_RSQ_CLAMP_F32_e32 v87, v87 ; 7EAE5957 V_MUL_F32_e32 v59, v59, v87 ; 1076AF3B V_MUL_F32_e32 v92, v41, v59 ; 10B87729 V_MUL_F32_e32 v93, v47, v87 ; 10BAAF2F V_MUL_F32_e32 v47, v40, v93 ; 105EBB28 V_SUB_F32_e32 v47, v47, v92 ; 085EB92F V_MUL_F32_e32 v87, v46, v87 ; 10AEAF2E V_MAD_F32 v46, v39, v87, v47, 0, 0 ; D282002E 04BEAF27 V_MUL_F32_e32 v94, v41, v61 ; 10BC7B29 V_MUL_F32_e32 v47, v40, v44 ; 105E5928 V_SUB_F32_e32 v47, v47, v94 ; 085EBD2F V_MAD_F32 v47, v39, v43, v47, 0, 0 ; D282002F 04BE5727 V_ADD_F32_e32 v95, v46, v47 ; 06BE5F2E V_MUL_F32_e32 v96, v39, v61 ; 10C07B27 V_MAD_F32 v97, v40, v44, v96, 0, 0 ; D2820061 05825928 V_MAD_F32 v97, v41, v43, v97, 0, 0 ; D2820061 05865729 V_MUL_F32_e32 v98, v39, v59 ; 10C47727 V_MAD_F32 v99, v40, v93, v98, 0, 0 ; D2820063 058ABB28 V_MAD_F32 v99, v41, v87, v99, 0, 0 ; D2820063 058EAF29 V_ADD_F32_e32 v100, v99, v97 ; 06C8C363 V_RCP_F32_e32 v100, v100 ; 7EC85564 V_MUL_F32_e32 v95, v95, v100 ; 10BEC95F V_SUB_F32_e32 v70, v95, v70 ; 088C8D5F V_MUL_F32_e64 v95, v70, -v67, 0, 0 ; D210005F 40028746 V_MOV_B32_e32 v101, 0x80000000 ; 7ECA02FF 80000000 V_XOR_B32_e32 v67, v67, v101 ; 3A86CB43 V_MAD_F32 v67, v70, v67, v95, 0, 0 ; D2820043 057E8746 V_MUL_F32_e32 v95, v40, v59 ; 10BE7728 V_MUL_F32_e32 v101, v39, v93 ; 10CABB27 V_SUB_F32_e32 v95, v101, v95 ; 08BEBF65 V_MAD_F32 v95, v41, v87, v95, 0, 0 ; D282005F 057EAF29 V_MUL_F32_e32 v61, v40, v61 ; 107A7B28 V_MUL_F32_e32 v101, v39, v44 ; 10CA5927 V_SUB_F32_e32 v61, v101, v61 ; 087A7B65 V_MAD_F32 v61, v41, v43, v61, 0, 0 ; D282003D 04F65729 V_ADD_F32_e32 v101, v95, v61 ; 06CA7B5F V_MUL_F32_e32 v101, v101, v100 ; 10CAC965 V_SUB_F32_e32 v71, v101, v71 ; 088E8F65 V_MAD_F32 v67, v71, v72, v67, 0, 0 ; D2820043 050E9147 V_MUL_F32_e32 v67, v71, v67 ; 10868747 V_MUL_F32_e32 v70, v70, v70 ; 108C8D46 V_MAD_F32 v67, v70, v72, v67, 0, 0 ; D2820043 050E9146 V_MUL_F32_e32 v67, 5.000000e-01, v67 ; 108686F0 V_RCP_F32_e32 v70, v66 ; 7E8C5542 V_MUL_F32_e32 v71, v67, v70 ; 108E8D43 V_MUL_F32_e32 v71, -1.442700e+00, v71 ; 108E8EFF BFB8AA65 V_EXP_F32_e32 v71, v71 ; 7E8E4B47 V_MUL_F32_e32 v69, v69, v71 ; 108A8F45 V_MOV_B32_e32 v71, -1.600000e+01 ; 7E8E02FF C1800000 V_MAD_F32 v67, v67, v70, v71, 0, 0 ; D2820043 051E8D43 V_CMP_GE_F32_e64 s[0:1], v67, 0.000000e+00, 0, 0 ; D00C0000 00010143 V_CNDMASK_B32_e64 v67, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000043 0001E480 V_MOV_B32_e32 v70, 0x80000000 ; 7E8C02FF 80000000 V_XOR_B32_e32 v66, v66, v70 ; 3A848D42 V_CMP_GE_F32_e64 s[0:1], v66, 0.000000e+00, 0, 0 ; D00C0000 00010142 V_CNDMASK_B32_e64 v66, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000042 0001E480 V_ADD_F32_e32 v66, v67, v66 ; 06848543 V_XOR_B32_e32 v66, v66, v70 ; 3A848D42 V_CMP_GE_F32_e64 s[0:1], v66, 0.000000e+00, 0, 0 ; D00C0000 00010142 V_CNDMASK_B32_e64 v66, 0, v69, s[0:1], 0, 0, 0, 0 ; D2000042 00028A80 V_MUL_F32_e32 v46, v75, v46 ; 105C5D4B V_MAD_F32 v46, v73, v99, v46, 0, 0 ; D282002E 04BAC749 V_MAD_F32 v46, v74, v95, v46, 0, 0 ; D282002E 04BABF4A V_SUB_F32_e32 v46, 1.000000e+00, v46 ; 085C5CF2 V_MUL_F32_e32 v67, v46, v46 ; 10865D2E V_MUL_F32_e32 v67, v67, v67 ; 10868743 V_MUL_F32_e32 v46, v46, v67 ; 105C872E V_MUL_F32_e32 v46, v66, v46 ; 105C5D42 V_ADD_F32_e64 v46, v46, 0, 1, 0 ; D206082E 0001012E V_MUL_F32_e32 v47, v47, v75 ; 105E972F V_MAD_F32 v47, v61, v74, v47, 0, 0 ; D282002F 04BE953D V_MAD_F32 v47, v97, v73, v47, 0, 0 ; D282002F 04BE9361 V_ADD_F32_e64 v47, v47, 0, 1, 0 ; D206082F 0001012F V_MAD_F32 v66, v46, v47, v84, 0, 0 ; D2820042 05525F2E V_MUL_F32_e32 v64, v60, v17 ; 1080233C IMAGE_SAMPLE v[72:75], 15, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[24:31], s[20:23] ; F0800F00 00A64840 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v60, v72, v72 ; 06789148 V_ADD_F32_e32 v60, -1.000000e+00, v60 ; 067878F3 V_MUL_F32_e32 v60, v60, v56 ; 1078713C V_ADD_F32_e32 v67, v73, v73 ; 06869349 V_ADD_F32_e32 v67, -1.000000e+00, v67 ; 068686F3 V_MUL_F32_e32 v67, v67, v56 ; 10867143 V_MUL_F32_e32 v69, v67, v67 ; 108A8743 V_MAD_F32 v70, v60, v60, v69, 0, 0 ; D2820046 0516793C V_ADD_F32_e32 v70, 1.000000e+00, v70 ; 068C8CF2 V_RSQ_CLAMP_F32_e32 v70, v70 ; 7E8C5946 V_MUL_F32_e32 v84, v67, v70 ; 10A88D43 V_MUL_F32_e32 v102, v60, v70 ; 10CC8D3C V_MUL_F32_e32 v103, v102, v40 ; 10CE5166 V_MAD_F32 v103, v84, v41, v103, 0, 0 ; D2820067 059E5354 V_MAD_F32 v78, v70, v41, v103, 0, 0 ; D282004E 059E5346 V_MUL_F32_e32 v103, v84, v39 ; 10CE4F54 V_MAD_F32 v103, v70, v40, v103, 0, 0 ; D2820067 059E5146 V_MAD_F32 v77, v102, v41, v103, 0, 0 ; D282004D 059E5366 V_MUL_F32_e32 v103, v84, v40 ; 10CE5154 V_MUL_F32_e64 v104, v102, -v39, 0, 0 ; D2100068 40024F66 V_SUB_F32_e32 v103, v104, v103 ; 08CECF68 V_MAD_F32 v76, v70, v39, v103, 0, 0 ; D282004C 059E4F46 V_CUBESC_F32 v104, v76, v77, v78, 0, 0 ; D28A0068 053A9B4C V_CUBETC_F32 v103, v76, v77, v78, 0, 0 ; D28C0067 053A9B4C V_CUBEMA_F32 v105, v76, v77, v78, 0, 0 ; D28E0069 053A9B4C V_CUBEID_F32 v106, v76, v77, v78, 0, 0 ; D288006A 053A9B4C V_MOV_B32_e32 v76, 0x7fffffff ; 7E9802FF 7FFFFFFF V_AND_B32_e32 v76, v105, v76 ; 36989969 V_RCP_F32_e32 v76, v76 ; 7E98554C V_MAD_F32 v105, v103, v76, v45, 0, 0 ; D2820069 04B69967 V_MAD_F32 v104, v104, v76, v45, 0, 0 ; D2820068 04B69968 IMAGE_SAMPLE v[76:78], 7, 0, 0, 0, 0, 0, 0, 0, v[104:107], s[12:19], s[8:11] ; F0800700 00434C68 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v79, v78 ; 7E9E4F4E V_MUL_LEGACY_F32_e32 v79, 2.200000e+00, v79 ; 0E9E9EFF 400CCCCD V_EXP_F32_e32 v79, v79 ; 7E9E4B4F V_MUL_F32_e32 v103, v41, v55 ; 10CE6F29 V_SUB_F32_e32 v103, v83, v103 ; 08CECF53 V_MUL_F32_e32 v58, v40, v58 ; 10747528 V_SUB_F32_e32 v103, v103, v58 ; 08CE7567 V_MUL_F32_e32 v103, v103, v102 ; 10CECD67 V_MUL_F32_e32 v82, v82, v84 ; 10A4A952 V_SUB_F32_e32 v82, v82, v103 ; 08A4CF52 V_MAD_F32 v82, v48, v70, v82, 0, 0 ; D2820052 054A8D30 V_ADD_F32_e64 v82, v82, 0, 1, 0 ; D2060852 00010152 V_ADD_F32_e32 v79, v82, v79 ; 069E9F52 V_MUL_F32_e32 v79, v63, v79 ; 109E9F3F V_MUL_F32_e32 v103, v75, v85 ; 10CEAB4B V_SUB_F32_e32 v69, v103, v69 ; 088A8B67 V_ADD_F32_e32 v69, v62, v69 ; 068A8B3E V_MUL_F32_e32 v72, v74, v85 ; 1090AB4A V_MUL_F32_e32 v73, v60, v60 ; 1092793C V_SUB_F32_e32 v72, v72, v73 ; 08909348 V_ADD_F32_e32 v72, v62, v72 ; 0690913E V_MUL_F32_e32 v72, v72, v69 ; 10908B48 V_MUL_F32_e32 v73, v67, v60 ; 10927943 IMAGE_SAMPLE v64, 8, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[36:43], s[32:35] ; F0800800 01094040 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v64, v64, v64 ; 06808140 V_ADD_F32_e32 v64, -1.000000e+00, v64 ; 068080F3 V_MUL_F32_e32 v64, v64, v85 ; 1080AB40 V_SUB_F32_e32 v64, v64, v73 ; 08809340 V_MUL_F32_e32 v65, v64, v64 ; 10828140 V_SUB_F32_e32 v65, v72, v65 ; 08828348 V_CMP_GE_F32_e32 vcc, v65, v68 ; 7C0C8941 V_CMP_U_F32_e64 s[0:1], v65, v65, 0, 0 ; D0100000 00028341 V_CNDMASK_B32_e64 v72, 0, -1, vcc, 0, 0, 0, 0 ; D2000048 01A98280 V_CNDMASK_B32_e64 v73, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000049 00018280 V_OR_B32_e32 v72, v72, v73 ; 38909348 V_CMP_NE_I32_e64 s[0:1], v72, 0, 0, 0 ; D10A0000 00010148 V_CNDMASK_B32_e64 v72, v86, v65, s[0:1], 0, 0, 0, 0 ; D2000048 00028356 V_RSQ_CLAMP_F32_e32 v72, v72 ; 7E905948 V_MUL_F32_e32 v73, v41, v93 ; 1092BB29 V_SUB_F32_e32 v73, v73, v98 ; 0892C549 V_MAD_F32 v73, v40, v87, v73, 0, 0 ; D2820049 0526AF28 V_MUL_F32_e32 v74, v41, v44 ; 10945929 V_SUB_F32_e32 v74, v74, v96 ; 0894C14A V_MAD_F32 v74, v40, v43, v74, 0, 0 ; D282004A 052A5728 V_ADD_F32_e32 v75, v73, v74 ; 06969549 V_MUL_F32_e32 v75, v75, v100 ; 1096C94B V_SUB_F32_e32 v60, v75, v60 ; 0878794B V_MUL_F32_e64 v75, v60, -v64, 0, 0 ; D210004B 4002813C V_MOV_B32_e32 v103, 0x80000000 ; 7ECE02FF 80000000 V_XOR_B32_e32 v64, v64, v103 ; 3A80CF40 V_MAD_F32 v64, v60, v64, v75, 0, 0 ; D2820040 052E813C V_SUB_F32_e32 v67, v101, v67 ; 08868765 V_MAD_F32 v64, v67, v69, v64, 0, 0 ; D2820040 05028B43 V_MUL_F32_e32 v64, v67, v64 ; 10808143 V_MUL_F32_e32 v60, v60, v60 ; 1078793C V_MAD_F32 v60, v60, v69, v64, 0, 0 ; D282003C 05028B3C V_MUL_F32_e32 v60, 5.000000e-01, v60 ; 107878F0 V_RCP_F32_e32 v64, v65 ; 7E805541 V_MUL_F32_e32 v67, v60, v64 ; 1086813C V_MUL_F32_e32 v67, -1.442700e+00, v67 ; 108686FF BFB8AA65 V_EXP_F32_e32 v67, v67 ; 7E864B43 V_MUL_F32_e32 v67, v72, v67 ; 10868748 V_MAD_F32 v60, v60, v64, v71, 0, 0 ; D282003C 051E813C V_CMP_GE_F32_e64 s[0:1], v60, 0.000000e+00, 0, 0 ; D00C0000 0001013C V_CNDMASK_B32_e64 v60, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D200003C 0001E480 V_MOV_B32_e32 v64, 0x80000000 ; 7E8002FF 80000000 V_XOR_B32_e32 v65, v65, v64 ; 3A828141 V_CMP_GE_F32_e64 s[0:1], v65, 0.000000e+00, 0, 0 ; D00C0000 00010141 V_CNDMASK_B32_e64 v65, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000041 0001E480 V_ADD_F32_e32 v60, v60, v65 ; 0678833C V_XOR_B32_e32 v60, v60, v64 ; 3A78813C V_CMP_GE_F32_e64 s[0:1], v60, 0.000000e+00, 0, 0 ; D00C0000 0001013C V_CNDMASK_B32_e64 v60, 0, v67, s[0:1], 0, 0, 0, 0 ; D200003C 00028680 V_MUL_F32_e32 v64, v102, v73 ; 10809366 V_MAD_F32 v64, v70, v99, v64, 0, 0 ; D2820040 0502C746 V_MAD_F32 v64, v84, v95, v64, 0, 0 ; D2820040 0502BF54 V_SUB_F32_e32 v64, 1.000000e+00, v64 ; 088080F2 V_MUL_F32_e32 v65, v64, v64 ; 10828140 V_MUL_F32_e32 v65, v65, v65 ; 10828341 V_MUL_F32_e32 v64, v64, v65 ; 10808340 V_MUL_F32_e32 v60, v60, v64 ; 1078813C V_ADD_F32_e64 v60, v60, 0, 1, 0 ; D206083C 0001013C V_MUL_F32_e32 v64, v74, v102 ; 1080CD4A V_MAD_F32 v61, v61, v84, v64, 0, 0 ; D282003D 0502A93D V_MAD_F32 v61, v97, v70, v61, 0, 0 ; D282003D 04F68D61 V_ADD_F32_e64 v61, v61, 0, 1, 0 ; D206083D 0001013D V_MAD_F32 v64, v60, v61, v79, 0, 0 ; D2820040 053E7B3C V_MUL_F32_e32 v64, v36, v64 ; 10808124 V_MAD_F32 v64, v66, v33, v64, 0, 0 ; D2820040 05024342 V_MUL_F32_e32 v66, v57, v14 ; 10841D39 V_MUL_F32_e32 v65, v57, v17 ; 10822339 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x8 ; C0800508 S_LOAD_DWORDX8 s[20:27], s[6:7], 0x10 ; C0CA0710 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[72:75], 15, 0, 0, 0, 0, 0, 0, 0, v[65:66], s[20:27], s[0:3] ; F0800F00 00054841 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v57, v72, v72 ; 06729148 V_ADD_F32_e32 v57, -1.000000e+00, v57 ; 067272F3 V_MUL_F32_e32 v57, v57, v56 ; 10727139 V_ADD_F32_e32 v65, v73, v73 ; 06829349 V_ADD_F32_e32 v65, -1.000000e+00, v65 ; 068282F3 V_MUL_F32_e32 v56, v65, v56 ; 10707141 V_MUL_F32_e32 v65, v56, v56 ; 10827138 V_MAD_F32 v66, v57, v57, v65, 0, 0 ; D2820042 05067339 V_ADD_F32_e32 v66, 1.000000e+00, v66 ; 068484F2 V_RSQ_CLAMP_F32_e32 v66, v66 ; 7E845942 V_MUL_F32_e32 v67, v56, v66 ; 10868538 V_MUL_F32_e32 v69, v67, v40 ; 108A5143 V_MUL_F32_e32 v70, v57, v66 ; 108C8539 V_MUL_F32_e64 v79, v70, -v39, 0, 0 ; D210004F 40024F46 V_SUB_F32_e32 v69, v79, v69 ; 088A8B4F V_MAD_F32 v103, v66, v41, v69, 0, 0 ; D2820067 05165342 V_MUL_F32_e32 v69, v70, v40 ; 108A5146 V_MAD_F32 v69, v66, v40, v69, 0, 0 ; D2820045 05165142 V_MAD_F32 v102, v67, v41, v69, 0, 0 ; D2820066 05165343 V_MUL_F32_e32 v69, v67, v39 ; 108A4F43 V_MAD_F32 v69, v66, v39, v69, 0, 0 ; D2820045 05164F42 V_MAD_F32 v101, v70, v41, v69, 0, 0 ; D2820065 05165346 V_MAD_F32 v69, v40, v93, v92, 0, 0 ; D2820045 0572BB28 V_MUL_F32_e32 v79, v39, v87 ; 109EAF27 V_SUB_F32_e32 v69, v69, v79 ; 088A9F45 V_MUL_F32_e32 v79, v70, v69 ; 109E8B46 V_MAD_F32 v84, v41, v93, v98, 0, 0 ; D2820054 058ABB29 V_MUL_F32_e32 v92, v40, v87 ; 10B8AF28 V_SUB_F32_e32 v84, v84, v92 ; 08A8B954 V_MAD_F32 v79, v67, v84, v79, 0, 0 ; D282004F 053EA943 V_MAD_F32 v79, v66, v99, v79, 0, 0 ; D282004F 053EC742 V_SUB_F32_e32 v79, 1.000000e+00, v79 ; 089E9EF2 V_MUL_F32_e32 v92, v79, v79 ; 10B89F4F V_MUL_F32_e32 v104, v92, v92 ; 10D0B95C V_CUBESC_F32 v106, v101, v102, v103, 0, 0 ; D28A006A 059ECD65 V_CUBETC_F32 v105, v101, v102, v103, 0, 0 ; D28C0069 059ECD65 V_CUBEMA_F32 v107, v101, v102, v103, 0, 0 ; D28E006B 059ECD65 V_CUBEID_F32 v108, v101, v102, v103, 0, 0 ; D288006C 059ECD65 V_MOV_B32_e32 v92, 0x7fffffff ; 7EB802FF 7FFFFFFF V_AND_B32_e32 v92, v107, v92 ; 36B8B96B V_RCP_F32_e32 v92, v92 ; 7EB8555C V_MAD_F32 v107, v105, v92, v45, 0, 0 ; D282006B 04B6B969 V_MAD_F32 v106, v106, v92, v45, 0, 0 ; D282006A 04B6B96A IMAGE_SAMPLE v[105:107], 7, 0, 0, 0, 0, 0, 0, 0, v[106:109], s[12:19], s[8:11] ; F0800700 0043696A S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v92, v107 ; 7EB84F6B V_MUL_LEGACY_F32_e32 v92, 2.200000e+00, v92 ; 0EB8B8FF 400CCCCD V_EXP_F32_e32 v92, v92 ; 7EB84B5C V_MAD_F32 v83, v41, v55, v83, 0, 0 ; D2820053 054E6F29 V_SUB_F32_e32 v58, v58, v83 ; 0874A73A V_MUL_F32_e32 v58, v58, v67 ; 1074873A V_MAD_F32 v55, v40, v55, v80, 0, 0 ; D2820037 05426F28 V_SUB_F32_e32 v55, v55, v81 ; 086EA337 V_MUL_F32_e32 v55, v55, v70 ; 106E8D37 V_SUB_F32_e32 v55, v55, v58 ; 086E7537 V_MAD_F32 v55, v48, v66, v55, 0, 0 ; D2820037 04DE8530 V_ADD_F32_e64 v55, v55, 0, 1, 0 ; D2060837 00010137 V_ADD_F32_e32 v58, v55, v92 ; 0674B937 V_MUL_F32_e32 v58, v63, v58 ; 1074753F V_MUL_F32_e32 v80, v75, v85 ; 10A0AB4B V_SUB_F32_e32 v65, v80, v65 ; 08828350 V_ADD_F32_e32 v65, v62, v65 ; 0682833E V_MUL_F32_e32 v80, v74, v85 ; 10A0AB4A V_MUL_F32_e32 v81, v57, v57 ; 10A27339 V_SUB_F32_e32 v80, v80, v81 ; 08A0A350 V_ADD_F32_e32 v62, v62, v80 ; 067CA13E V_MUL_F32_e32 v62, v62, v65 ; 107C833E V_MUL_F32_e32 v80, v56, v57 ; 10A07338 V_ADD_F32_e32 v72, v75, v75 ; 0690974B V_ADD_F32_e32 v72, -1.000000e+00, v72 ; 069090F3 V_MUL_F32_e32 v72, v72, v85 ; 1090AB48 V_SUB_F32_e32 v72, v72, v80 ; 0890A148 V_MUL_F32_e32 v73, v72, v72 ; 10929148 V_SUB_F32_e32 v62, v62, v73 ; 087C933E V_CMP_GE_F32_e32 vcc, v62, v68 ; 7C0C893E V_CMP_U_F32_e64 s[0:1], v62, v62, 0, 0 ; D0100000 00027D3E V_CNDMASK_B32_e64 v73, 0, -1, vcc, 0, 0, 0, 0 ; D2000049 01A98280 V_CNDMASK_B32_e64 v74, 0, -1, s[0:1], 0, 0, 0, 0 ; D200004A 00018280 V_OR_B32_e32 v73, v73, v74 ; 38929549 V_CMP_NE_I32_e64 s[0:1], v73, 0, 0, 0 ; D10A0000 00010149 V_CNDMASK_B32_e64 v73, v86, v62, s[0:1], 0, 0, 0, 0 ; D2000049 00027D56 V_RSQ_CLAMP_F32_e32 v73, v73 ; 7E925949 V_MAD_F32 v74, v40, v44, v94, 0, 0 ; D282004A 057A5928 V_MUL_F32_e32 v75, v39, v43 ; 10965727 V_SUB_F32_e32 v74, v74, v75 ; 0894974A V_ADD_F32_e32 v69, v69, v74 ; 068A9545 V_MUL_F32_e32 v69, v69, v100 ; 108AC945 V_SUB_F32_e32 v57, v69, v57 ; 08727345 V_MUL_F32_e64 v69, v57, -v72, 0, 0 ; D2100045 40029139 V_MOV_B32_e32 v75, 0x80000000 ; 7E9602FF 80000000 V_XOR_B32_e32 v72, v72, v75 ; 3A909748 V_MAD_F32 v69, v57, v72, v69, 0, 0 ; D2820045 05169139 V_MAD_F32 v44, v41, v44, v96, 0, 0 ; D282002C 05825929 V_MUL_F32_e32 v43, v40, v43 ; 10565728 V_SUB_F32_e32 v43, v44, v43 ; 0856572C V_ADD_F32_e32 v44, v84, v43 ; 06585754 V_MUL_F32_e32 v44, v44, v100 ; 1058C92C V_SUB_F32_e32 v44, v44, v56 ; 0858712C V_MAD_F32 v56, v44, v65, v69, 0, 0 ; D2820038 0516832C V_MUL_F32_e32 v44, v44, v56 ; 1058712C V_MUL_F32_e32 v56, v57, v57 ; 10707339 V_MAD_F32 v44, v56, v65, v44, 0, 0 ; D282002C 04B28338 V_MUL_F32_e32 v44, 5.000000e-01, v44 ; 105858F0 V_RCP_F32_e32 v56, v62 ; 7E70553E V_MUL_F32_e32 v57, v44, v56 ; 1072712C V_MUL_F32_e32 v57, -1.442700e+00, v57 ; 107272FF BFB8AA65 V_EXP_F32_e32 v57, v57 ; 7E724B39 V_MUL_F32_e32 v57, v73, v57 ; 10727349 V_MAD_F32 v44, v44, v56, v71, 0, 0 ; D282002C 051E712C V_CMP_GE_F32_e64 s[0:1], v44, 0.000000e+00, 0, 0 ; D00C0000 0001012C V_CNDMASK_B32_e64 v44, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D200002C 0001E480 V_MOV_B32_e32 v56, 0x80000000 ; 7E7002FF 80000000 V_XOR_B32_e32 v62, v62, v56 ; 3A7C713E V_CMP_GE_F32_e64 s[0:1], v62, 0.000000e+00, 0, 0 ; D00C0000 0001013E V_CNDMASK_B32_e64 v62, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D200003E 0001E480 V_ADD_F32_e32 v44, v44, v62 ; 06587D2C V_XOR_B32_e32 v44, v44, v56 ; 3A58712C V_CMP_GE_F32_e64 s[0:1], v44, 0.000000e+00, 0, 0 ; D00C0000 0001012C V_CNDMASK_B32_e64 v44, 0, v57, s[0:1], 0, 0, 0, 0 ; D200002C 00027280 V_MUL_F32_e32 v56, v79, v104 ; 1070D14F V_MUL_F32_e32 v44, v44, v56 ; 1058712C V_ADD_F32_e64 v44, v44, 0, 1, 0 ; D206082C 0001012C V_MUL_F32_e32 v56, v74, v70 ; 10708D4A V_MAD_F32 v43, v43, v67, v56, 0, 0 ; D282002B 04E2872B V_MAD_F32 v43, v97, v66, v43, 0, 0 ; D282002B 04AE8561 V_ADD_F32_e64 v43, v43, 0, 1, 0 ; D206082B 0001012B V_MAD_F32 v56, v44, v43, v58, 0, 0 ; D2820038 04EA572C V_MAD_F32 v56, v56, v38, v64, 0, 0 ; D2820038 05024D38 V_MOV_B32_e32 v42, v91 ; 7E54035B V_CUBESC_F32 v95, v39, v40, v41, 0, 0 ; D28A005F 04A65127 V_CUBETC_F32 v94, v39, v40, v41, 0, 0 ; D28C005E 04A65127 V_CUBEMA_F32 v96, v39, v40, v41, 0, 0 ; D28E0060 04A65127 V_CUBEID_F32 v97, v39, v40, v41, 0, 0 ; D2880061 04A65127 V_MOV_B32_e32 v39, 0x7fffffff ; 7E4E02FF 7FFFFFFF V_AND_B32_e32 v39, v96, v39 ; 364E4F60 V_RCP_F32_e32 v39, v39 ; 7E4E5527 V_MAD_F32 v96, v94, v39, v45, 0, 0 ; D2820060 04B64F5E V_MAD_F32 v95, v95, v39, v45, 0, 0 ; D282005F 04B64F5F IMAGE_SAMPLE v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[95:98], s[12:19], s[8:11] ; F0800700 0043275F S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v42, v41 ; 7E544F29 V_MUL_LEGACY_F32_e32 v42, 2.200000e+00, v42 ; 0E5454FF 400CCCCD V_EXP_F32_e32 v42, v42 ; 7E544B2A V_ADD_F32_e64 v45, v48, 0, 1, 0 ; D206082D 00010130 V_ADD_F32_e32 v42, v42, v45 ; 06545B2A V_LOG_F32_e32 v48, v49 ; 7E604F31 V_MUL_LEGACY_F32_e32 v48, 2.200000e+00, v48 ; 0E6060FF 400CCCCD V_EXP_F32_e32 v48, v48 ; 7E604B30 V_MUL_F32_e32 v48, v36, v48 ; 10606124 V_LOG_F32_e32 v57, v52 ; 7E724F34 V_MUL_LEGACY_F32_e32 v57, 2.200000e+00, v57 ; 0E7272FF 400CCCCD V_EXP_F32_e32 v57, v57 ; 7E724B39 V_MAD_F32 v48, v57, v33, v48, 0, 0 ; D2820030 04C24339 V_LOG_F32_e32 v57, v88 ; 7E724F58 V_MUL_LEGACY_F32_e32 v57, 2.200000e+00, v57 ; 0E7272FF 400CCCCD V_EXP_F32_e32 v57, v57 ; 7E724B39 V_MAD_F32 v48, v57, v38, v48, 0, 0 ; D2820030 04C24D39 V_MUL_F32_e32 v57, 3.000000e-01, v48 ; 107260FF 3E99999A V_LOG_F32_e32 v49, v50 ; 7E624F32 V_MUL_LEGACY_F32_e32 v49, 2.200000e+00, v49 ; 0E6262FF 400CCCCD V_EXP_F32_e32 v49, v49 ; 7E624B31 V_MUL_F32_e32 v49, v36, v49 ; 10626324 V_LOG_F32_e32 v50, v53 ; 7E644F35 V_MUL_LEGACY_F32_e32 v50, 2.200000e+00, v50 ; 0E6464FF 400CCCCD V_EXP_F32_e32 v50, v50 ; 7E644B32 V_MAD_F32 v49, v50, v33, v49, 0, 0 ; D2820031 04C64332 V_LOG_F32_e32 v50, v89 ; 7E644F59 V_MUL_LEGACY_F32_e32 v50, 2.200000e+00, v50 ; 0E6464FF 400CCCCD V_EXP_F32_e32 v50, v50 ; 7E644B32 V_MAD_F32 v49, v50, v38, v49, 0, 0 ; D2820031 04C64D32 V_MUL_F32_e32 v50, -5.900000e-01, v49 ; 106462FF BF170A3D V_SUB_F32_e32 v50, v50, v57 ; 08647332 V_MUL_F32_e32 v51, 1.100000e-01, v63 ; 10667EFF 3DE147AE V_SUB_F32_e32 v50, v50, v51 ; 08646732 V_ADD_F32_e32 v50, 1.000000e+00, v50 ; 066464F2 V_MUL_F32_e32 v37, v50, v37 ; 104A4B32 V_MAD_F32 v37, v37, v42, v56, 0, 0 ; D2820025 04E25525 V_SUB_F32_e32 v35, v35, v34 ; 08464523 V_SUB_F32_e32 v42, v21, v12 ; 08541915 V_MUL_F32_e32 v32, v42, v32 ; 1040412A V_ADD_F32_e64 v32, v32, 0, 1, 0 ; D2060820 00010120 V_MAD_F32 v34, v32, v35, v34, 0, 0 ; D2820022 048A4720 V_MUL_F32_e32 v34, v37, v34 ; 10444525 V_MOV_B32_e32 v35, 0x80000000 ; 7E4602FF 80000000 V_XOR_B32_e32 v35, v87, v35 ; 3A464757 V_RCP_F32_e32 v35, v35 ; 7E465523 V_MUL_F32_e32 v35, v42, v35 ; 1046472A V_MUL_F32_e32 v42, v35, v93 ; 1054BB23 V_MUL_F32_e32 v51, v35, v59 ; 10667723 V_MUL_F32_e32 v51, v51, v51 ; 10666733 V_MAD_F32 v42, v42, v42, v51, 0, 0 ; D282002A 04CE552A V_MUL_F32_e32 v35, v35, v87 ; 1046AF23 V_MAD_F32 v35, v35, v35, v42, 0, 0 ; D2820023 04AA4723 V_CMP_GE_F32_e32 vcc, v35, v68 ; 7C0C8923 V_CMP_U_F32_e64 s[0:1], v35, v35, 0, 0 ; D0100000 00024723 V_CNDMASK_B32_e64 v42, 0, -1, vcc, 0, 0, 0, 0 ; D200002A 01A98280 V_CNDMASK_B32_e64 v51, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000033 00018280 V_OR_B32_e32 v42, v42, v51 ; 3854672A V_CMP_NE_I32_e64 s[0:1], v42, 0, 0, 0 ; D10A0000 0001012A V_CNDMASK_B32_e64 v35, v86, v35, s[0:1], 0, 0, 0, 0 ; D2000023 00024756 V_RSQ_CLAMP_F32_e32 v42, v35 ; 7E545923 V_MUL_F32_e32 v42, v42, v35 ; 1054472A V_MOV_B32_e32 v51, 0x80000000 ; 7E6602FF 80000000 V_XOR_B32_e32 v35, v35, v51 ; 3A466723 V_CMP_GT_F32_e32 vcc, 0, v35 ; 7C084680 V_CNDMASK_B32_e64 v35, 0.000000e+00, v42, vcc, 0, 0, 0, 0 ; D2000023 01AA5480 V_XOR_B32_e32 v35, v35, v51 ; 3A466723 V_MUL_F32_e32 v24, v35, v24 ; 10303123 V_MUL_F32_e32 v24, 1.442700e+00, v24 ; 103030FF 3FB8AA65 V_EXP_F32_e32 v24, v24 ; 7E304B18 V_MUL_F32_e32 v35, v87, v87 ; 1046AF57 V_MUL_F32_e32 v35, v35, v35 ; 10464723 V_MUL_F32_e32 v35, v87, v35 ; 10464757 V_SUBREV_F32_e32 v35, 1.000000e+00, v35 ; 0A4646F2 V_ADD_F32_e32 v35, 1.000000e+00, v35 ; 064646F2 V_MUL_F32_e32 v24, v24, v35 ; 10304718 V_MUL_F32_e32 v34, v24, v34 ; 10444518 V_SUB_F32_e32 v14, v14, v30 ; 081C3D0E V_SUB_F32_e32 v17, v17, v29 ; 08223B11 V_MUL_F32_e32 v17, v17, v17 ; 10222311 V_MAD_F32 v14, v14, v14, v17, 0, 0 ; D282000E 04461D0E V_SUB_F32_e32 v17, v12, v28 ; 0822390C V_MAD_F32 v14, v17, v17, v14, 0, 0 ; D282000E 043A2311 V_MUL_F32_e32 v14, v14, v23 ; 101C2F0E V_MUL_F32_e32 v22, v17, v22 ; 102C2D11 V_MUL_F32_e32 v22, 1.442700e+00, v22 ; 102C2CFF 3FB8AA65 V_EXP_F32_e32 v22, v22 ; 7E2C4B16 V_SUB_F32_e32 v22, 1.000000e+00, v22 ; 082C2CF2 V_MUL_F32_e32 v14, v22, v14 ; 101C1D16 V_RCP_F32_e32 v17, v17 ; 7E225511 V_MUL_F32_e32 v14, v17, v14 ; 101C1D11 V_MUL_F32_e32 v14, 1.442700e+00, v14 ; 101C1CFF 3FB8AA65 V_EXP_F32_e32 v14, v14 ; 7E1C4B0E V_ADD_F32_e64 v14, v14, 0, 1, 0 ; D206080E 0001010E V_SUB_F32_e32 v14, 1.000000e+00, v14 ; 081C1CF2 V_MAD_F32 v19, v9, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820013 03C1E109 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x18 ; C0800518 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x30 ; C0C40730 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v9, 1, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[8:15], s[0:3] ; F0800100 00020913 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v9, v14, v9 ; 1012130E V_SUB_F32_e32 v14, 1.000000e+00, v9 ; 081C12F2 V_MUL_F32_e32 v17, v14, v34 ; 1022450E V_SUB_F32_e32 v12, v12, v21 ; 08182B0C V_CMP_GE_F32_e64 s[0:1], v12, 0.000000e+00, 0, 0 ; D00C0000 0001010C V_CNDMASK_B32_e64 v12, v17, v37, s[0:1], 0, 0, 0, 0 ; D200000C 00024B11 V_MOV_B32_e32 v17, 0x7fffffff ; 7E2202FF 7FFFFFFF V_AND_B32_e32 v12, v12, v17 ; 3618230C V_LOG_F32_e32 v12, v12 ; 7E184F0C V_MUL_F32_e32 v12, 4.545450e-01, v12 ; 101818FF 3EE8BA1F V_EXP_F32_e32 v12, v12 ; 7E184B0C S_LOAD_DWORDX4 s[8:11], s[4:5], 0x20 ; C0840520 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x40 ; C0C60740 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[19:21], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800700 00431306 V_SUB_F32_e32 v6, 1.000000e+00, v2 ; 080C04F2 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v7, v6, v21 ; 100E2B06 V_MAD_F32 v53, v2, v12, v7, 0, 0 ; D2820035 041E1902 V_LOG_F32_e32 v7, v26 ; 7E0E4F1A V_MUL_LEGACY_F32_e32 v7, 2.200000e+00, v7 ; 0E0E0EFF 400CCCCD V_EXP_F32_e32 v7, v7 ; 7E0E4B07 V_ADD_F32_e32 v7, v31, v7 ; 060E0F1F V_MUL_F32_e32 v7, v49, v7 ; 100E0F31 V_MAD_F32 v7, v46, v47, v7, 0, 0 ; D2820007 041E5F2E V_LOG_F32_e32 v12, v77 ; 7E184F4D V_MUL_LEGACY_F32_e32 v12, 2.200000e+00, v12 ; 0E1818FF 400CCCCD V_EXP_F32_e32 v12, v12 ; 7E184B0C V_ADD_F32_e32 v12, v82, v12 ; 06181952 V_MUL_F32_e32 v12, v49, v12 ; 10181931 V_MAD_F32 v12, v60, v61, v12, 0, 0 ; D282000C 04327B3C V_MUL_F32_e32 v12, v36, v12 ; 10181924 V_MAD_F32 v7, v7, v33, v12, 0, 0 ; D2820007 04324307 V_LOG_F32_e32 v12, v106 ; 7E184F6A V_MUL_LEGACY_F32_e32 v12, 2.200000e+00, v12 ; 0E1818FF 400CCCCD V_EXP_F32_e32 v12, v12 ; 7E184B0C V_ADD_F32_e32 v12, v55, v12 ; 06181937 V_MUL_F32_e32 v12, v49, v12 ; 10181931 V_MAD_F32 v12, v44, v43, v12, 0, 0 ; D282000C 0432572C V_MAD_F32 v7, v12, v38, v7, 0, 0 ; D2820007 041E4D0C V_LOG_F32_e32 v12, v40 ; 7E184F28 V_MUL_LEGACY_F32_e32 v12, 2.200000e+00, v12 ; 0E1818FF 400CCCCD V_EXP_F32_e32 v12, v12 ; 7E184B0C V_ADD_F32_e32 v12, v12, v45 ; 06185B0C V_MUL_F32_e32 v17, v50, v18 ; 10222532 V_MAD_F32 v7, v17, v12, v7, 0, 0 ; D2820007 041E1911 V_SUB_F32_e32 v12, v15, v16 ; 0818210F V_MAD_F32 v12, v32, v12, v16, 0, 0 ; D282000C 04421920 V_MUL_F32_e32 v12, v7, v12 ; 10181907 V_MUL_F32_e32 v12, v24, v12 ; 10181918 V_MUL_F32_e32 v12, v14, v12 ; 1018190E V_CNDMASK_B32_e64 v7, v12, v7, s[0:1], 0, 0, 0, 0 ; D2000007 00020F0C V_MOV_B32_e32 v12, 0x7fffffff ; 7E1802FF 7FFFFFFF V_AND_B32_e32 v7, v7, v12 ; 360E1907 V_LOG_F32_e32 v7, v7 ; 7E0E4F07 V_MUL_F32_e32 v7, 4.545450e-01, v7 ; 100E0EFF 3EE8BA1F V_EXP_F32_e32 v7, v7 ; 7E0E4B07 V_MUL_F32_e32 v12, v6, v20 ; 10182906 V_MAD_F32 v52, v2, v7, v12, 0, 0 ; D2820034 04320F02 V_LOG_F32_e32 v7, v25 ; 7E0E4F19 V_MUL_LEGACY_F32_e32 v7, 2.200000e+00, v7 ; 0E0E0EFF 400CCCCD V_EXP_F32_e32 v7, v7 ; 7E0E4B07 V_ADD_F32_e32 v7, v31, v7 ; 060E0F1F V_MUL_F32_e32 v7, v48, v7 ; 100E0F30 V_MAD_F32 v7, v46, v47, v7, 0, 0 ; D2820007 041E5F2E V_LOG_F32_e32 v12, v76 ; 7E184F4C V_MUL_LEGACY_F32_e32 v12, 2.200000e+00, v12 ; 0E1818FF 400CCCCD V_EXP_F32_e32 v12, v12 ; 7E184B0C V_ADD_F32_e32 v12, v82, v12 ; 06181952 V_MUL_F32_e32 v12, v48, v12 ; 10181930 V_MAD_F32 v12, v60, v61, v12, 0, 0 ; D282000C 04327B3C V_MUL_F32_e32 v12, v36, v12 ; 10181924 V_MAD_F32 v7, v7, v33, v12, 0, 0 ; D2820007 04324307 V_LOG_F32_e32 v12, v105 ; 7E184F69 V_MUL_LEGACY_F32_e32 v12, 2.200000e+00, v12 ; 0E1818FF 400CCCCD V_EXP_F32_e32 v12, v12 ; 7E184B0C V_ADD_F32_e32 v12, v55, v12 ; 06181937 V_MUL_F32_e32 v12, v48, v12 ; 10181930 V_MAD_F32 v12, v44, v43, v12, 0, 0 ; D282000C 0432572C V_MAD_F32 v7, v12, v38, v7, 0, 0 ; D2820007 041E4D0C V_LOG_F32_e32 v12, v39 ; 7E184F27 V_MUL_LEGACY_F32_e32 v12, 2.200000e+00, v12 ; 0E1818FF 400CCCCD V_EXP_F32_e32 v12, v12 ; 7E184B0C V_ADD_F32_e32 v12, v12, v45 ; 06185B0C V_MUL_F32_e32 v13, v50, v13 ; 101A1B32 V_MAD_F32 v7, v13, v12, v7, 0, 0 ; D2820007 041E190D V_SUB_F32_e32 v10, v10, v11 ; 0814170A V_MAD_F32 v10, v32, v10, v11, 0, 0 ; D282000A 042E1520 V_MUL_F32_e32 v10, v7, v10 ; 10141507 V_MUL_F32_e32 v10, v24, v10 ; 10141518 V_MUL_F32_e32 v10, v14, v10 ; 1014150E V_CNDMASK_B32_e64 v7, v10, v7, s[0:1], 0, 0, 0, 0 ; D2000007 00020F0A V_MOV_B32_e32 v10, 0x7fffffff ; 7E1402FF 7FFFFFFF V_AND_B32_e32 v7, v7, v10 ; 360E1507 V_LOG_F32_e32 v7, v7 ; 7E0E4F07 V_MUL_F32_e32 v7, 4.545450e-01, v7 ; 100E0EFF 3EE8BA1F V_EXP_F32_e32 v7, v7 ; 7E0E4B07 V_MUL_F32_e32 v6, v6, v19 ; 100C2706 V_MAD_F32 v51, v2, v7, v6, 0, 0 ; D2820033 041A0F02 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x24 ; C0840524 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x48 ; C0C60748 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[12:19], s[8:11] ; F0800700 00430A33 V_MUL_F32_e32 v6, v4, v2 ; 100C0504 V_SUB_F32_e32 v2, v2, v6 ; 08040D02 V_SUB_F32_e32 v3, 1.000000e+00, v2 ; 080604F2 V_MUL_F32_e32 v4, v3, v52 ; 10086903 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v2, v11, v4, 0, 0 ; D2820004 04121702 V_SUB_F32_e32 v5, v8, v4 ; 080A0908 V_CNDMASK_B32_e64 v6, 0, v9, s[0:1], 0, 0, 0, 0 ; D2000006 00021280 V_MAD_F32 v4, v6, v5, v4, 0, 0 ; D2820004 04120B06 V_MUL_F32_e32 v5, v3, v51 ; 100A6703 V_MAD_F32 v5, v2, v10, v5, 0, 0 ; D2820005 04161502 V_SUB_F32_e32 v1, v1, v5 ; 08020B01 V_MAD_F32 v1, v6, v1, v5, 0, 0 ; D2820001 04160306 V_CVT_PKRTZ_F16_F32_e32 v1, v1, v4 ; 5E020901 V_MUL_F32_e32 v3, v3, v53 ; 10066B03 V_MAD_F32 v2, v2, v12, v3, 0, 0 ; D2820002 040E1902 V_SUB_F32_e32 v0, v0, v2 ; 08000500 V_MAD_F32 v0, v6, v0, v2, 0, 0 ; D2820000 040A0106 V_CNDMASK_B32_e64 v2, 0, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000002 0001E480 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v2 ; 5E000500 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL SAMP[0] DCL CONST[0..15] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 255.0000, -0.5000} IMM[1] FLT32 { 0.5000, -0.5000, 1.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[15], IN[0] 1: MOV TEMP[1].xy, TEMP[0].xyxx 2: ADD TEMP[0].zw, TEMP[0].xyxy, CONST[15] 3: MOV TEMP[1].zw, TEMP[0].wwzw 4: MUL TEMP[0].xy, TEMP[1], CONST[9] 5: MOV TEMP[1].xy, TEMP[0].xyxx 6: ADD TEMP[2].zw, TEMP[1], IMM[0].xxxx 7: MUL TEMP[2].xy, TEMP[2].zwzw, CONST[10] 8: MOV TEMP[3].xy, TEMP[2].xyyy 9: MOV TEMP[3].w, IMM[0].yyyy 10: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 11: MUL TEMP[3].z, TEMP[3].wwww, CONST[9].zzzz 12: MUL TEMP[3].z, TEMP[3].zzzz, IMM[0].zzzz 13: MUL TEMP[2], TEMP[0].yyyy, CONST[12] 14: MAD TEMP[2], TEMP[0].xxxx, CONST[11], TEMP[2] 15: MAD TEMP[1], TEMP[3].zzzz, CONST[13], TEMP[2] 16: ADD TEMP[1], TEMP[1], CONST[14] 17: MUL TEMP[2], TEMP[1].yyyy, CONST[1] 18: MAD TEMP[2], TEMP[1].xxxx, CONST[0], TEMP[2] 19: MAD TEMP[2], TEMP[1].zzzz, CONST[2], TEMP[2] 20: MAD TEMP[2], TEMP[1].wwww, CONST[3], TEMP[2] 21: MOV TEMP[0], TEMP[2] 22: MUL TEMP[2], TEMP[1].yyyy, CONST[5] 23: MAD TEMP[2], TEMP[1].xxxx, CONST[4], TEMP[2] 24: MAD TEMP[2], TEMP[1].zzzz, CONST[6], TEMP[2] 25: MAD TEMP[2], TEMP[1].wwww, CONST[7], TEMP[2] 26: MAD TEMP[1], TEMP[2], IMM[1].xyzz, IMM[0].xxyy 27: MOV OUT[1], TEMP[1] 28: MOV OUT[0], TEMP[0] 29: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %70 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0 %76 = add i32 %5, %7 %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %76) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = fadd float %66, %78 %81 = fadd float %67, %79 %82 = fadd float %80, %68 %83 = fadd float %81, %69 %84 = fmul float %80, %45 %85 = fmul float %81, %46 %86 = fadd float %82, 5.000000e-01 %87 = fadd float %83, 5.000000e-01 %88 = fmul float %86, %48 %89 = fmul float %87, %49 %90 = bitcast float %88 to i32 %91 = bitcast float %89 to i32 %92 = bitcast float 0.000000e+00 to i32 %93 = insertelement <4 x i32> undef, i32 %90, i32 0 %94 = insertelement <4 x i32> %93, i32 %91, i32 1 %95 = insertelement <4 x i32> %94, i32 %92, i32 2 %96 = insertelement <4 x i32> %95, i32 undef, i32 3 %97 = bitcast <8 x i32> %71 to <32 x i8> %98 = bitcast <4 x i32> %73 to <16 x i8> %99 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 3 %101 = fmul float %100, %47 %102 = fmul float %101, 2.550000e+02 %103 = fmul float %85, %54 %104 = fmul float %85, %55 %105 = fmul float %85, %56 %106 = fmul float %85, %57 %107 = fmul float %84, %50 %108 = fadd float %107, %103 %109 = fmul float %84, %51 %110 = fadd float %109, %104 %111 = fmul float %84, %52 %112 = fadd float %111, %105 %113 = fmul float %84, %53 %114 = fadd float %113, %106 %115 = fmul float %102, %58 %116 = fadd float %115, %108 %117 = fmul float %102, %59 %118 = fadd float %117, %110 %119 = fmul float %102, %60 %120 = fadd float %119, %112 %121 = fmul float %102, %61 %122 = fadd float %121, %114 %123 = fadd float %116, %62 %124 = fadd float %118, %63 %125 = fadd float %120, %64 %126 = fadd float %122, %65 %127 = fmul float %124, %17 %128 = fmul float %124, %18 %129 = fmul float %124, %19 %130 = fmul float %124, %20 %131 = fmul float %123, %13 %132 = fadd float %131, %127 %133 = fmul float %123, %14 %134 = fadd float %133, %128 %135 = fmul float %123, %15 %136 = fadd float %135, %129 %137 = fmul float %123, %16 %138 = fadd float %137, %130 %139 = fmul float %125, %21 %140 = fadd float %139, %132 %141 = fmul float %125, %22 %142 = fadd float %141, %134 %143 = fmul float %125, %23 %144 = fadd float %143, %136 %145 = fmul float %125, %24 %146 = fadd float %145, %138 %147 = fmul float %126, %25 %148 = fadd float %147, %140 %149 = fmul float %126, %26 %150 = fadd float %149, %142 %151 = fmul float %126, %27 %152 = fadd float %151, %144 %153 = fmul float %126, %28 %154 = fadd float %153, %146 %155 = fmul float %124, %33 %156 = fmul float %124, %34 %157 = fmul float %124, %35 %158 = fmul float %124, %36 %159 = fmul float %123, %29 %160 = fadd float %159, %155 %161 = fmul float %123, %30 %162 = fadd float %161, %156 %163 = fmul float %123, %31 %164 = fadd float %163, %157 %165 = fmul float %123, %32 %166 = fadd float %165, %158 %167 = fmul float %125, %37 %168 = fadd float %167, %160 %169 = fmul float %125, %38 %170 = fadd float %169, %162 %171 = fmul float %125, %39 %172 = fadd float %171, %164 %173 = fmul float %125, %40 %174 = fadd float %173, %166 %175 = fmul float %126, %41 %176 = fadd float %175, %168 %177 = fmul float %126, %42 %178 = fadd float %177, %170 %179 = fmul float %126, %43 %180 = fadd float %179, %172 %181 = fmul float %126, %44 %182 = fadd float %181, %174 %183 = fmul float %176, 5.000000e-01 %184 = fadd float %183, 5.000000e-01 %185 = fmul float %178, -5.000000e-01 %186 = fadd float %185, 5.000000e-01 %187 = fmul float %180, 1.000000e+00 %188 = fadd float %187, 0.000000e+00 %189 = fmul float %182, 1.000000e+00 %190 = fadd float %189, 0.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %184, float %186, float %188, float %190) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %148, float %150, float %152, float %154) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[8:11], s[8:9], 0x0 ; C0840900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[8:11][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80020000 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x3c ; C204013C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s8, v0 ; 06080008 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x24 ; C2040124 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s8, v4 ; 100A0808 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x3d ; C204013D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s8, v1 ; 06000208 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x25 ; C2040125 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s8, v0 ; 10020008 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x30 ; C2040130 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s8, v1 ; 10040208 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x2c ; C204012C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v5, s8, v2, 0, 0 ; D2820002 04081105 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x3f ; C204013F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s8, v0 ; 06000008 V_ADD_F32_e32 v0, 5.000000e-01, v0 ; 060000F0 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x29 ; C2040129 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s8, v0 ; 100E0008 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x3e ; C204013E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s8, v4 ; 06000808 V_ADD_F32_e32 v0, 5.000000e-01, v0 ; 060000F0 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x28 ; C2040128 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s8, v0 ; 100C0008 V_MOV_B32_e32 v8, 0 ; 7E100280 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x0 ; C0840500 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x0 ; C0C60700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE_L v0, 8, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[12:19], s[8:11] ; F0900800 00430006 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x26 ; C2020126 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MUL_F32_e32 v0, s4, v0 ; 10000004 V_MUL_F32_e32 v0, 2.550000e+02, v0 ; 100000FF 437F0000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x34 ; C2020134 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v0, s4, v2, 0, 0 ; D2820002 04080900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x38 ; C2020138 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v2, s4, v2 ; 06040404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x31 ; C2020131 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v1 ; 10060204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2d ; C202012D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v5, s4, v3, 0, 0 ; D2820003 040C0905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x35 ; C2020135 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v0, s4, v3, 0, 0 ; D2820003 040C0900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x39 ; C2020139 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v3 ; 10080604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v2, s4, v4, 0, 0 ; D2820004 04100902 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x32 ; C2020132 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v1 ; 100C0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2e ; C202012E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v5, s4, v6, 0, 0 ; D2820006 04180905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x36 ; C2020136 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, v0, s4, v6, 0, 0 ; D2820006 04180900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3a ; C202013A S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s4, v6 ; 060C0C04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v6, s4, v4, 0, 0 ; D2820004 04100906 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x33 ; C2020133 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2f ; C202012F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v5, s4, v1, 0, 0 ; D2820001 04040905 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x37 ; C2020137 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s4, v1, 0, 0 ; D2820000 04040900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3b ; C202013B S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s4, v0 ; 06000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1f ; C202011F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v0, s4, v4, 0, 0 ; D2820001 04100900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v3 ; 10080604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v2, s4, v4, 0, 0 ; D2820004 04100902 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v6, s4, v4, 0, 0 ; D2820004 04100906 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1e ; C202011E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v0, s4, v4, 0, 0 ; D2820004 04100900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x11 ; C2020111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v2, s4, v5, 0, 0 ; D2820005 04140902 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v6, s4, v5, 0, 0 ; D2820005 04140906 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1d ; C202011D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v0, s4, v5, 0, 0 ; D2820005 04140900 V_MAD_F32 v5, v5, -5.000000e-01, 5.000000e-01, 0, 0 ; D2820005 03C1E305 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v3 ; 100E0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v2, s4, v7, 0, 0 ; D2820007 041C0902 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v6, s4, v7, 0, 0 ; D2820007 041C0906 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1c ; C202011C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v0, s4, v7, 0, 0 ; D2820007 041C0900 V_MAD_F32 v7, v7, 5.000000e-01, 5.000000e-01, 0, 0 ; D2820007 03C1E107 EXP 15, 32, 0, 0, 0, v7, v5, v4, v1 ; F800020F 01040507 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v1, s4, v3 ; 10020604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v2, s4, v1, 0, 0 ; D2820001 04040902 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v6, s4, v1, 0, 0 ; D2820001 04040906 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v0, s4, v1, 0, 0 ; D2820001 04040900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s4, v3 ; 10080604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v2, s4, v4, 0, 0 ; D2820004 04100902 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v6, s4, v4, 0, 0 ; D2820004 04100906 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xe ; C202010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v0, s4, v4, 0, 0 ; D2820004 04100900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v2, s4, v5, 0, 0 ; D2820005 04140902 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v6, s4, v5, 0, 0 ; D2820005 04140906 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v0, s4, v5, 0, 0 ; D2820005 04140900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v3 ; 10060604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v2, s4, v3, 0, 0 ; D2820002 040C0902 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v6, s4, v2, 0, 0 ; D2820002 04080906 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s0, v2, 0, 0 ; D2820000 04080100 EXP 15, 12, 0, 1, 0, v0, v5, v4, v1 ; F80008CF 01040500 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = fmul float %26, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %27, -1.000000e+00 %31 = fadd float %30, 1.000000e+00 %32 = bitcast float %29 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call i32 @llvm.SI.packf16(float %39, float %40) %44 = bitcast i32 %43 to float %45 = call i32 @llvm.SI.packf16(float %41, float %42) %46 = bitcast i32 %45 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %44, float %46, float %44, float %46) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_INTERP_P1_F32 v4, v0, 1, 0, [m0] ; C8100100 V_INTERP_P2_F32 v4, [v4], v1, 1, 0, [m0] ; C8110101 V_SUB_F32_e32 v3, 1.000000e+00, v4 ; 080608F2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_PKRTZ_F16_F32_e32 v4, v2, v3 ; 5E080702 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v1 ; 5E000300 EXP 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..254] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000} 0: F2I TEMP[0].x, IN[2].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: MOV TEMP[0].xyz, CONST[ADDR[0].x+7].xyzx 3: MAD TEMP[1].xy, CONST[6], IN[1].zwzw, TEMP[0] 4: MOV TEMP[1].xy, TEMP[1].xyxx 5: ADD TEMP[2].xy, TEMP[0].zzzz, -CONST[5].xzzw 6: MOV TEMP[0].xy, TEMP[2].xyxx 7: MUL TEMP[2].z, TEMP[0].zzzz, CONST[4].zzzz 8: MOV TEMP[0].z, TEMP[2].zzzz 9: LRP TEMP[2].x, TEMP[2].zzzz, CONST[4].yyyy, CONST[4].xxxx 10: MUL TEMP[3], TEMP[0], CONST[5].ywzw 11: MOV_SAT TEMP[3], TEMP[3] 12: ADD TEMP[4].y, -TEMP[3].yyyy, IMM[0].xxxx 13: MUL TEMP[3].xy, TEMP[4].yyyy, TEMP[3].xxxx 14: MOV TEMP[3].xy, TEMP[3].xyxx 15: MAD TEMP[4].xy, IN[1], IMM[0].yyyy, IMM[0].zzzz 16: MOV TEMP[0].xy, TEMP[4].xyxx 17: MAD TEMP[4].xy, TEMP[0], -TEMP[2].xxxx, IN[0] 18: MUL TEMP[2], TEMP[4].yyyy, CONST[1] 19: MAD TEMP[0], TEMP[4].xxxx, CONST[0], TEMP[2] 20: MAD TEMP[0], CONST[4].wwww, CONST[2], TEMP[0] 21: ADD TEMP[0], TEMP[0], CONST[3] 22: MOV TEMP[1].zw, IMM[0].xxwx 23: MOV TEMP[3].zw, IMM[0].xxwx 24: MOV OUT[1], TEMP[1] 25: MOV OUT[0], TEMP[0] 26: MOV OUT[2], TEMP[3] 27: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %40 = load <16 x i8> addrspace(2)* %39, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = fptosi float %57 to i32 %59 = bitcast i32 %58 to float %60 = bitcast float %59 to i32 %61 = shl i32 %60, 4 %62 = add i32 %61, 112 %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %62) %64 = shl i32 %60, 4 %65 = add i32 %64, 116 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = shl i32 %60, 4 %68 = add i32 %67, 120 %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %68) %70 = fmul float %37, %51 %71 = fadd float %70, %63 %72 = fmul float %38, %52 %73 = fadd float %72, %66 %74 = fsub float -0.000000e+00, %33 %75 = fadd float %69, %74 %76 = fsub float -0.000000e+00, %35 %77 = fadd float %69, %76 %78 = fmul float %69, %31 %79 = call float @llvm.AMDGPU.lrp(float %78, float %30, float %29) %80 = fmul float %75, %34 %81 = fmul float %77, %36 %82 = fmul float %78, %35 %83 = fmul float 0.000000e+00, %36 %84 = call float @llvm.AMDIL.clamp.(float %80, float 0.000000e+00, float 1.000000e+00) %85 = call float @llvm.AMDIL.clamp.(float %81, float 0.000000e+00, float 1.000000e+00) %86 = call float @llvm.AMDIL.clamp.(float %82, float 0.000000e+00, float 1.000000e+00) %87 = call float @llvm.AMDIL.clamp.(float %83, float 0.000000e+00, float 1.000000e+00) %88 = fsub float -0.000000e+00, %85 %89 = fadd float %88, 1.000000e+00 %90 = fmul float %89, %84 %91 = fmul float %89, %84 %92 = fmul float %49, 2.000000e+00 %93 = fadd float %92, -1.000000e+00 %94 = fmul float %50, 2.000000e+00 %95 = fadd float %94, -1.000000e+00 %96 = fsub float -0.000000e+00, %79 %97 = fmul float %93, %96 %98 = fadd float %97, %43 %99 = fsub float -0.000000e+00, %79 %100 = fmul float %95, %99 %101 = fadd float %100, %44 %102 = fmul float %101, %17 %103 = fmul float %101, %18 %104 = fmul float %101, %19 %105 = fmul float %101, %20 %106 = fmul float %98, %13 %107 = fadd float %106, %102 %108 = fmul float %98, %14 %109 = fadd float %108, %103 %110 = fmul float %98, %15 %111 = fadd float %110, %104 %112 = fmul float %98, %16 %113 = fadd float %112, %105 %114 = fmul float %32, %21 %115 = fadd float %114, %107 %116 = fmul float %32, %22 %117 = fadd float %116, %109 %118 = fmul float %32, %23 %119 = fadd float %118, %111 %120 = fmul float %32, %24 %121 = fadd float %120, %113 %122 = fadd float %115, %25 %123 = fadd float %117, %26 %124 = fadd float %119, %27 %125 = fadd float %121, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %71, float %73, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %90, float %91, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %122, float %123, float %124, float %125) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_I32_F32_e32 v1, v1 ; 7E021101 V_LSHLREV_B32_e32 v1, 4, v1 ; 34020284 V_ADD_I32_e32 v2, 0x74, v1 ; 4A0402FF 00000074 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[3:6], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010300 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MAD_F32 v2, s4, v6, v2, 0, 0 ; D2820002 040A0C04 V_ADD_I32_e32 v7, 0x70, v1 ; 4A0E02FF 00000070 BUFFER_LOAD_DWORD v7, s[0:3] + v7 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000707 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MAD_F32 v7, s4, v5, v7, 0, 0 ; D2820007 041E0A04 V_MOV_B32_e32 v8, 1.000000e+00 ; 7E1002F2 V_MOV_B32_e32 v9, 0.000000e+00 ; 7E120280 EXP 15, 32, 0, 0, 0, v7, v2, v9, v8 ; F800020F 08090207 V_ADD_I32_e32 v1, 0x78, v1 ; 4A0202FF 00000078 BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT vmcnt(0) expcnt(0) lgkmcnt(0) ; BF8C0000 V_SUBREV_F32_e32 v2, s4, v1 ; 0A040204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v2 ; 10040404 V_ADD_F32_e64 v2, v2, 0, 1, 0 ; D2060802 00010102 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v7, s4, v1 ; 0A0E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v7 ; 100E0E04 V_ADD_F32_e64 v7, v7, 0, 1, 0 ; D2060807 00010107 V_SUB_F32_e32 v7, 1.000000e+00, v7 ; 080E0EF2 V_MUL_F32_e32 v2, v7, v2 ; 10040507 EXP 15, 33, 0, 0, 0, v2, v2, v9, v8 ; F800021F 08090202 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v1, s4, v1 ; 10020204 V_SUB_F32_e32 v2, 1.000000e+00, v1 ; 080402F2 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v2 ; 10040404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x11 ; C2020111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v1, s4, v2, 0, 0 ; D2820001 04080901 V_ADD_F32_e32 v2, v3, v3 ; 06040703 V_ADD_F32_e32 v2, -1.000000e+00, v2 ; 060404F3 V_MUL_F32_e32 v2, v2, v1 ; 10040302 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[7:10], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010700 S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v0, v7, v2 ; 08000507 V_ADD_F32_e32 v2, v4, v4 ; 06040904 V_ADD_F32_e32 v2, -1.000000e+00, v2 ; 060404F3 V_MUL_F32_e32 v1, v2, v1 ; 10020302 V_SUB_F32_e32 v1, v8, v1 ; 08020308 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v1 ; 10040204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v0, s4, v2, 0, 0 ; D2820002 04080900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xb ; C202810B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s5 ; 7E060205 V_MAD_F32 v2, s4, v3, v2, 0, 0 ; D2820002 040A0604 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xf ; C202810F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v2, s5, v2 ; 06040405 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x6 ; C2028106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s5, v1 ; 10060205 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x2 ; C2028102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v0, s5, v3, 0, 0 ; D2820003 040C0B00 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xa ; C202810A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v4, s5 ; 7E080205 V_MAD_F32 v3, s4, v4, v3, 0, 0 ; D2820003 040E0804 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xe ; C202810E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s5, v3 ; 06060605 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x5 ; C2028105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s5, v1 ; 10080205 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x1 ; C2028101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, v0, s5, v4, 0, 0 ; D2820004 04100B00 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x9 ; C2028109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s5 ; 7E0A0205 V_MAD_F32 v4, s4, v5, v4, 0, 0 ; D2820004 04120A04 S_BUFFER_LOAD_DWORD s5, s[0:3], 0xd ; C202810D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s5, v4 ; 06080805 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x4 ; C2028104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s5, v1 ; 10020205 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x0 ; C2028100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s5, v1, 0, 0 ; D2820000 04040B00 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x8 ; C2028108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s5 ; 7E020205 V_MAD_F32 v0, s4, v1, v0, 0, 0 ; D2820000 04020204 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 EXP 15, 12, 0, 1, 0, v0, v4, v3, v2 ; F80008CF 02030400 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1].wwww 7: MUL TEMP[1].xyz, TEMP[1], IN[1].xxxx 8: MOV TEMP[0].xyz, TEMP[1].xyzx 9: MUL TEMP[0].xyz, TEMP[0], CONST[0] 10: ABS TEMP[1].x, TEMP[0].xxxx 11: LG2 TEMP[1].x, TEMP[1].xxxx 12: ABS TEMP[2].x, TEMP[0].yyyy 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[1].y, TEMP[2].xxxx 15: ABS TEMP[0].x, TEMP[0].zzzz 16: LG2 TEMP[0].x, TEMP[0].xxxx 17: MOV TEMP[1].z, TEMP[0].xxxx 18: MUL TEMP[0].xyz, TEMP[1], IMM[0].zzzz 19: EX2 TEMP[1].x, TEMP[0].xxxx 20: EX2 TEMP[2].x, TEMP[0].yyyy 21: MOV TEMP[1].y, TEMP[2].xxxx 22: EX2 TEMP[0].x, TEMP[0].zzzz 23: MOV TEMP[1].z, TEMP[0].xxxx 24: MOV TEMP[1].w, IMM[0].yyyy 25: MOV OUT[0], TEMP[1] 26: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %28 = load <8 x i32> addrspace(2)* %27, !tbaa !0 %29 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %30 = load <4 x i32> addrspace(2)* %29, !tbaa !0 %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %34 = bitcast float %31 to i32 %35 = bitcast float %32 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %28 to <32 x i8> %39 = bitcast <4 x i32> %30 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = call float @llvm.pow.f32(float %41, float 0x40019999A0000000) %45 = call float @llvm.pow.f32(float %42, float 0x40019999A0000000) %46 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %47 = fmul float %44, %33 %48 = fmul float %45, %33 %49 = fmul float %46, %33 %50 = fmul float %47, %24 %51 = fmul float %48, %25 %52 = fmul float %49, %26 %53 = call float @fabs(float %50) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %51) %56 = call float @llvm.log2.f32(float %55) %57 = call float @fabs(float %52) %58 = call float @llvm.log2.f32(float %57) %59 = fmul float %54, 0x3FDD1743E0000000 %60 = fmul float %56, 0x3FDD1743E0000000 %61 = fmul float %58, 0x3FDD1743E0000000 %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call float @llvm.AMDIL.exp.(float %60) %64 = call float @llvm.AMDIL.exp.(float %61) %65 = call i32 @llvm.SI.packf16(float %62, float %63) %66 = bitcast i32 %65 to float %67 = call i32 @llvm.SI.packf16(float %64, float 1.000000e+00) %68 = bitcast i32 %67 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %66, float %68, float %66, float %68) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x0 ; C0860500 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x0 ; C0C80700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800700 00640202 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v5, v3 ; 7E0A4F03 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_INTERP_P1_F32 v6, v0, 0, 1, [m0] ; C8180400 V_INTERP_P2_F32 v6, [v6], v1, 0, 1, [m0] ; C8190401 V_MUL_F32_e32 v0, v5, v6 ; 10000D05 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 V_MOV_B32_e32 v1, 0x7fffffff ; 7E0202FF 7FFFFFFF V_AND_B32_e32 v0, v0, v1 ; 36000300 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_F32_e32 v0, 4.545450e-01, v0 ; 100000FF 3EE8BA1F V_EXP_F32_e32 v0, v0 ; 7E004B00 V_LOG_F32_e32 v5, v2 ; 7E0A4F02 V_MUL_LEGACY_F32_e32 v5, 2.200000e+00, v5 ; 0E0A0AFF 400CCCCD V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_MUL_F32_e32 v5, v5, v6 ; 100A0D05 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v5 ; 100A0A04 V_AND_B32_e32 v5, v5, v1 ; 360A0305 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_F32_e32 v5, 4.545450e-01, v5 ; 100A0AFF 3EE8BA1F V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_CVT_PKRTZ_F16_F32_e32 v0, v5, v0 ; 5E000105 V_LOG_F32_e32 v2, v4 ; 7E044F04 V_MUL_LEGACY_F32_e32 v2, 2.200000e+00, v2 ; 0E0404FF 400CCCCD V_EXP_F32_e32 v2, v2 ; 7E044B02 V_MUL_F32_e32 v2, v2, v6 ; 10040D02 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x2 ; C2000102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s0, v2 ; 10040400 V_AND_B32_e32 v1, v2, v1 ; 36020302 V_LOG_F32_e32 v1, v1 ; 7E024F01 V_MUL_F32_e32 v1, 4.545450e-01, v1 ; 100202FF 3EE8BA1F V_EXP_F32_e32 v1, v1 ; 7E024B01 V_CVT_PKRTZ_F16_F32_e64 v1, v1, 1.000000e+00, 0, 0 ; D25E0001 0001E501 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL CONST[0..12] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, -0.5000, 0.0000} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0].xyz, CONST[10], IN[2].yyyy 1: MOV TEMP[0].xyz, TEMP[0].xyzx 2: MAD TEMP[1].xyz, IN[2].xxxx, CONST[9], TEMP[0] 3: MOV TEMP[0].xyz, TEMP[1].xyzx 4: MAD TEMP[1].xyz, IN[2].zzzz, CONST[11], TEMP[0] 5: MOV TEMP[0].xyz, TEMP[1].xyzx 6: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 7: MAX TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 8: RSQ TEMP[1].x, TEMP[1].xxxx 9: MOV TEMP[0].w, TEMP[1].xxxx 10: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[0] 11: MOV TEMP[1].xyz, TEMP[1].xyzx 12: MUL TEMP[0], CONST[10], IN[0].yyyy 13: MAD TEMP[0], IN[0].xxxx, CONST[9], TEMP[0] 14: MAD TEMP[0], IN[0].zzzz, CONST[11], TEMP[0] 15: ADD TEMP[0], TEMP[0], CONST[12] 16: MUL TEMP[2], TEMP[0].yyyy, CONST[1] 17: MAD TEMP[2], TEMP[0].xxxx, CONST[0], TEMP[2] 18: MAD TEMP[2], TEMP[0].zzzz, CONST[2], TEMP[2] 19: MAD TEMP[2], TEMP[0].wwww, CONST[3], TEMP[2] 20: RCP TEMP[3].x, TEMP[2].wwww 21: MOV TEMP[0].w, TEMP[3].xxxx 22: MUL TEMP[3].xy, TEMP[3].xxxx, TEMP[2] 23: MOV TEMP[3].xy, TEMP[3].xyxx 24: MOV TEMP[4], TEMP[2] 25: ADD TEMP[5].xy, TEMP[0], CONST[8].zwzw 26: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 27: MOV TEMP[5].zw, TEMP[5].wwzw 28: MUL TEMP[6].xy, TEMP[0].yyyy, CONST[5] 29: MOV TEMP[2].xy, TEMP[6].xyxx 30: MAD TEMP[6].xy, TEMP[0].xxxx, CONST[4], TEMP[2] 31: MOV TEMP[2].xy, TEMP[6].xyxx 32: MAD TEMP[6].xy, TEMP[0].zzzz, CONST[6], TEMP[2] 33: MOV TEMP[2].xy, TEMP[6].xyxx 34: MOV TEMP[6].xyz, TEMP[0].xyzx 35: ADD TEMP[2].xy, TEMP[2], CONST[7] 36: MOV TEMP[0].xy, TEMP[2].xyxx 37: MAD TEMP[0].xy, TEMP[0], IMM[0].yzww, IMM[0].zzzz 38: MOV TEMP[0].xy, TEMP[0].xyxx 39: MOV TEMP[5].xy, IN[1].xyxx 40: MOV TEMP[6].w, IMM[1].xxxx 41: MOV TEMP[3].zw, IMM[1].xxyx 42: MOV TEMP[0].zw, IMM[1].xxyx 43: MOV OUT[2], TEMP[5] 44: MOV OUT[3], TEMP[6] 45: MOV OUT[0], TEMP[4] 46: MOV OUT[1], TEMP[1] 47: MOV OUT[4], TEMP[3] 48: MOV OUT[5], TEMP[0] 49: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %57 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0 %66 = add i32 %5, %7 %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %66) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0 %72 = add i32 %5, %7 %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %72) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = fmul float %45, %75 %78 = fmul float %46, %75 %79 = fmul float %47, %75 %80 = fmul float %74, %41 %81 = fadd float %80, %77 %82 = fmul float %74, %42 %83 = fadd float %82, %78 %84 = fmul float %74, %43 %85 = fadd float %84, %79 %86 = fmul float %76, %49 %87 = fadd float %86, %81 %88 = fmul float %76, %50 %89 = fadd float %88, %83 %90 = fmul float %76, %51 %91 = fadd float %90, %85 %92 = fmul float %87, %87 %93 = fmul float %89, %89 %94 = fadd float %93, %92 %95 = fmul float %91, %91 %96 = fadd float %94, %95 %97 = fcmp uge float %96, 0x3E7AD7F2A0000000 %98 = select i1 %97, float %96, float 0x3E7AD7F2A0000000 %99 = call float @llvm.AMDGPU.rsq.clamped.f32(float %98) %100 = fmul float %99, %87 %101 = fmul float %99, %89 %102 = fmul float %99, %91 %103 = fmul float %45, %62 %104 = fmul float %46, %62 %105 = fmul float %47, %62 %106 = fmul float %48, %62 %107 = fmul float %61, %41 %108 = fadd float %107, %103 %109 = fmul float %61, %42 %110 = fadd float %109, %104 %111 = fmul float %61, %43 %112 = fadd float %111, %105 %113 = fmul float %61, %44 %114 = fadd float %113, %106 %115 = fmul float %63, %49 %116 = fadd float %115, %108 %117 = fmul float %63, %50 %118 = fadd float %117, %110 %119 = fmul float %63, %51 %120 = fadd float %119, %112 %121 = fmul float %63, %52 %122 = fadd float %121, %114 %123 = fadd float %116, %53 %124 = fadd float %118, %54 %125 = fadd float %120, %55 %126 = fadd float %122, %56 %127 = fmul float %124, %17 %128 = fmul float %124, %18 %129 = fmul float %124, %19 %130 = fmul float %124, %20 %131 = fmul float %123, %13 %132 = fadd float %131, %127 %133 = fmul float %123, %14 %134 = fadd float %133, %128 %135 = fmul float %123, %15 %136 = fadd float %135, %129 %137 = fmul float %123, %16 %138 = fadd float %137, %130 %139 = fmul float %125, %21 %140 = fadd float %139, %132 %141 = fmul float %125, %22 %142 = fadd float %141, %134 %143 = fmul float %125, %23 %144 = fadd float %143, %136 %145 = fmul float %125, %24 %146 = fadd float %145, %138 %147 = fmul float %126, %25 %148 = fadd float %147, %140 %149 = fmul float %126, %26 %150 = fadd float %149, %142 %151 = fmul float %126, %27 %152 = fadd float %151, %144 %153 = fmul float %126, %28 %154 = fadd float %153, %146 %155 = fdiv float 1.000000e+00, %154 %156 = fmul float %155, %148 %157 = fmul float %155, %150 %158 = fadd float %123, %39 %159 = fadd float %124, %40 %160 = fmul float %158, %37 %161 = fmul float %159, %38 %162 = fmul float %124, %31 %163 = fmul float %124, %32 %164 = fmul float %123, %29 %165 = fadd float %164, %162 %166 = fmul float %123, %30 %167 = fadd float %166, %163 %168 = fmul float %125, %33 %169 = fadd float %168, %165 %170 = fmul float %125, %34 %171 = fadd float %170, %167 %172 = fadd float %169, %35 %173 = fadd float %171, %36 %174 = fmul float %172, 5.000000e-01 %175 = fadd float %174, -5.000000e-01 %176 = fmul float %173, -5.000000e-01 %177 = fadd float %176, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %100, float %101, float %102, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %68, float %69, float %160, float %161) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %123, float %124, float %125, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %156, float %157, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %175, float %177, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %148, float %150, float %152, float %154) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[4:7], s[2:3], 0x0 ; C0820300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s2, s[4:7], 0x29 ; C2010529 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s2, v2 ; 100A0402 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x25 ; C2000525 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s0 ; 7E0C0200 V_MAD_F32 v5, v1, v6, v5, 0, 0 ; D2820005 04160D01 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x2d ; C200052D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s0 ; 7E0E0200 V_MAD_F32 v5, v3, v7, v5, 0, 0 ; D2820005 04160F03 S_BUFFER_LOAD_DWORD s3, s[4:7], 0x28 ; C2018528 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s3, v2 ; 10100403 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x24 ; C2000524 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s0 ; 7E120200 V_MAD_F32 v8, v1, v9, v8, 0, 0 ; D2820008 04221301 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x2c ; C200052C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v10, s0 ; 7E140200 V_MAD_F32 v8, v3, v10, v8, 0, 0 ; D2820008 04221503 V_MUL_F32_e32 v11, v8, v8 ; 10161108 V_MAD_F32 v11, v5, v5, v11, 0, 0 ; D282000B 042E0B05 S_BUFFER_LOAD_DWORD s10, s[4:7], 0x2a ; C205052A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v12, s10, v2 ; 1018040A S_BUFFER_LOAD_DWORD s0, s[4:7], 0x26 ; C2000526 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v13, s0 ; 7E1A0200 V_MAD_F32 v12, v1, v13, v12, 0, 0 ; D282000C 04321B01 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x2e ; C200052E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v14, s0 ; 7E1C0200 V_MAD_F32 v1, v3, v14, v12, 0, 0 ; D2820001 04321D03 V_MAD_F32 v2, v1, v1, v11, 0, 0 ; D2820002 042E0301 V_MOV_B32_e32 v3, 1.000000e-07 ; 7E0602FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v2, v3 ; 7C0C0702 V_CMP_U_F32_e64 s[0:1], v2, v2, 0, 0 ; D0100000 00020502 V_CNDMASK_B32_e64 v3, 0, -1, vcc, 0, 0, 0, 0 ; D2000003 01A98280 V_CNDMASK_B32_e64 v4, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000004 00018280 V_OR_B32_e32 v3, v3, v4 ; 38060903 V_MOV_B32_e32 v4, 0x33d6bf95 ; 7E0802FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v3, 0, 0, 0 ; D10A0000 00010103 V_CNDMASK_B32_e64 v2, v4, v2, s[0:1], 0, 0, 0, 0 ; D2000002 00020504 V_RSQ_CLAMP_F32_e32 v2, v2 ; 7E045902 V_MUL_F32_e32 v1, v2, v1 ; 10020302 V_MUL_F32_e32 v3, v2, v5 ; 10060B02 V_MUL_F32_e32 v2, v2, v8 ; 10041102 V_MOV_B32_e32 v4, 0.000000e+00 ; 7E080280 EXP 15, 32, 0, 0, 0, v2, v3, v1, v4 ; F800020F 04010302 S_LOAD_DWORDX4 s[12:15], s[8:9], 0x0 ; C0860900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[15:18], s[12:15][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80030F00 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v1, s2, v16 ; 10022002 V_MAD_F32 v1, v15, v6, v1, 0, 0 ; D2820001 04060D0F V_MAD_F32 v1, v17, v7, v1, 0, 0 ; D2820001 04060F11 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x31 ; C2000531 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s0, v1 ; 06020200 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x23 ; C2000523 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v2, s0, v1 ; 06040200 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x21 ; C2000521 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s0, v2 ; 10040400 V_MUL_F32_e32 v3, s3, v16 ; 10062003 V_MAD_F32 v3, v15, v9, v3, 0, 0 ; D2820003 040E130F V_MAD_F32 v3, v17, v10, v3, 0, 0 ; D2820003 040E1511 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x30 ; C2000530 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s0, v3 ; 06060600 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x22 ; C2000522 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s0, v3 ; 060A0600 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x20 ; C2000520 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s0, v5 ; 100A0A00 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[6:9], s[0:3][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000600 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 33, 0, 0, 0, v6, v7, v5, v2 ; F800021F 02050706 V_MUL_F32_e32 v0, s10, v16 ; 1000200A V_MAD_F32 v0, v15, v13, v0, 0, 0 ; D2820000 04021B0F V_MAD_F32 v0, v17, v14, v0, 0, 0 ; D2820000 04021D11 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x32 ; C2000532 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MOV_B32_e32 v2, 1.000000e+00 ; 7E0402F2 EXP 15, 34, 0, 0, 0, v3, v1, v0, v2 ; F800022F 02000103 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x5 ; C2000505 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v5, s0, v1 ; 100A0200 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x1 ; C2000501 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v3, s0, v5, 0, 0 ; D2820005 04140103 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x9 ; C2000509 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v0, s0, v5, 0, 0 ; D2820005 04140100 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x2b ; C200052B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s0, v16 ; 100C2000 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x27 ; C2000527 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s0 ; 7E0E0200 V_MAD_F32 v6, v15, v7, v6, 0, 0 ; D2820006 041A0F0F S_BUFFER_LOAD_DWORD s0, s[4:7], 0x2f ; C200052F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s0 ; 7E0E0200 V_MAD_F32 v6, v17, v7, v6, 0, 0 ; D2820006 041A0F11 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x33 ; C2000533 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s0, v6 ; 060C0C00 S_BUFFER_LOAD_DWORD s0, s[4:7], 0xd ; C200050D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v6, s0, v5, 0, 0 ; D2820005 04140106 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x7 ; C2000507 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s0, v1 ; 100E0200 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x3 ; C2000503 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v3, s0, v7, 0, 0 ; D2820007 041C0103 S_BUFFER_LOAD_DWORD s0, s[4:7], 0xb ; C200050B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v0, s0, v7, 0, 0 ; D2820007 041C0100 S_BUFFER_LOAD_DWORD s0, s[4:7], 0xf ; C200050F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v6, s0, v7, 0, 0 ; D2820007 041C0106 V_RCP_F32_e32 v8, v7 ; 7E105507 V_MUL_F32_e32 v9, v8, v5 ; 10120B08 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x4 ; C2000504 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v10, s0, v1 ; 10140200 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x0 ; C2000500 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v10, v3, s0, v10, 0, 0 ; D282000A 04280103 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x8 ; C2000508 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v10, v0, s0, v10, 0, 0 ; D282000A 04280100 S_BUFFER_LOAD_DWORD s0, s[4:7], 0xc ; C200050C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v10, v6, s0, v10, 0, 0 ; D282000A 04280106 V_MUL_F32_e32 v8, v8, v10 ; 10101508 EXP 15, 35, 0, 0, 0, v8, v9, v4, v2 ; F800023F 02040908 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x15 ; C2000515 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v8, s0, v1 ; 10100200 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x11 ; C2000511 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v8, v3, s0, v8, 0, 0 ; D2820008 04200103 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x19 ; C2000519 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v8, v0, s0, v8, 0, 0 ; D2820008 04200100 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x1d ; C200051D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v8, s0, v8 ; 06101000 V_MAD_F32 v8, v8, -5.000000e-01, -5.000000e-01, 0, 0 ; D2820008 03C5E308 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x14 ; C2000514 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v9, s0, v1 ; 10120200 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x10 ; C2000510 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v9, v3, s0, v9, 0, 0 ; D2820009 04240103 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x18 ; C2000518 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v9, v0, s0, v9, 0, 0 ; D2820009 04240100 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x1c ; C200051C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v9, s0, v9 ; 06121200 V_MAD_F32 v9, v9, 5.000000e-01, -5.000000e-01, 0, 0 ; D2820009 03C5E109 EXP 15, 36, 0, 0, 0, v9, v8, v4, v2 ; F800024F 02040809 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x6 ; C2000506 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v1, s0, v1 ; 10020200 S_BUFFER_LOAD_DWORD s0, s[4:7], 0x2 ; C2000502 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v3, s0, v1, 0, 0 ; D2820001 04040103 S_BUFFER_LOAD_DWORD s0, s[4:7], 0xa ; C200050A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s0, v1, 0, 0 ; D2820000 04040100 S_BUFFER_LOAD_DWORD s0, s[4:7], 0xe ; C200050E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v6, s0, v0, 0, 0 ; D2820000 04000106 EXP 15, 12, 0, 1, 0, v10, v5, v0, v7 ; F80008CF 0700050A S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..8] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, -0.1000, 0.0000} IMM[1] FLT32 { 0.0000, 0.0529, 0.8460, 0.5290} IMM[2] FLT32 { 199.0000, 0.1000, 0.4545, 1.4427} IMM[3] FLT32 { 1.0000, -1.0000, 0.5000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1] 7: ABS TEMP[2].x, TEMP[1].wwww 8: POW TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 9: MOV TEMP[3].x, TEMP[2].xxxx 10: ADD TEMP[4].xyz, TEMP[2].xxxx, IMM[0].zzzz 11: FSLT TEMP[5].xyz, TEMP[4].xyzz, IMM[0].wwww 12: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 13: OR TEMP[6].x, TEMP[6].xxxx, TEMP[5].yyyy 14: UIF TEMP[6].xxxx :0 15: KILL 16: ENDIF 17: MOV TEMP[5].xyz, IN[0].xyzz 18: TEX TEMP[5], TEMP[5], SAMP[2], CUBE 19: POW TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx 20: POW TEMP[6].y, TEMP[5].yyyy, IMM[0].xxxx 21: POW TEMP[6].z, TEMP[5].zzzz, IMM[0].xxxx 22: POW TEMP[6].w, TEMP[5].wwww, IMM[0].yyyy 23: MUL TEMP[1].xyz, TEMP[1], TEMP[6] 24: MOV TEMP[0].xyz, TEMP[1].xyzx 25: ADD TEMP[1].xyz, TEMP[0], TEMP[0] 26: MOV TEMP[0].xyz, TEMP[1].xyzx 27: ADD TEMP[1].yzw, CONST[3].xxyz, -IN[2].xxyz 28: MOV TEMP[3].yzw, TEMP[1].zyzw 29: DP3 TEMP[5].x, TEMP[1].yzww, TEMP[1].yzww 30: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx 31: RSQ TEMP[5].x, TEMP[5].xxxx 32: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[1].yzww 33: MAD TEMP[3].yzw, TEMP[3], TEMP[5].xxxx, IMM[1].yyzw 34: MOV TEMP[5].w, IMM[0].wwww 35: MOV TEMP[5].x, TEMP[3].yyyy 36: MOV TEMP[5].y, TEMP[3].zzzz 37: MOV TEMP[5].z, TEMP[3].wwww 38: DP4 TEMP[3].x, TEMP[5], TEMP[5] 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[3].xyz, TEMP[5], TEMP[3].xxxx 41: DP3 TEMP[3].x, TEMP[3].xyzz, IN[0].xyzz 42: MOV_SAT TEMP[3].x, TEMP[3].xxxx 43: DP3 TEMP[5].x, IN[0].xyzz, TEMP[1].xyzz 44: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 45: MAD TEMP[5].yzw, TEMP[5].yyyy, IN[0].xxyz, -TEMP[1].xxyz 46: MOV TEMP[6].xyz, TEMP[5].yzww 47: TEX TEMP[6], TEMP[6], SAMP[4], CUBE 48: POW TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 49: POW TEMP[7].y, TEMP[6].yyyy, IMM[0].xxxx 50: POW TEMP[7].z, TEMP[6].zzzz, IMM[0].xxxx 51: POW TEMP[7].w, TEMP[6].wwww, IMM[0].yyyy 52: MOV TEMP[4].w, TEMP[7].wwww 53: MOV TEMP[6].xy, IN[1].xyyy 54: TEX TEMP[6], TEMP[6], SAMP[1], 2D 55: POW TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx 56: POW TEMP[8].y, TEMP[6].yyyy, IMM[0].xxxx 57: POW TEMP[8].z, TEMP[6].zzzz, IMM[0].xxxx 58: POW TEMP[8].w, TEMP[6].wwww, IMM[0].yyyy 59: ABS TEMP[6].x, TEMP[8].wwww 60: POW TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 61: MOV TEMP[5].xyz, TEMP[5].yzww 62: TEX TEMP[5], TEMP[5], SAMP[3], CUBE 63: POW TEMP[9].x, TEMP[5].xxxx, IMM[0].xxxx 64: POW TEMP[9].y, TEMP[5].yyyy, IMM[0].xxxx 65: POW TEMP[9].z, TEMP[5].zzzz, IMM[0].xxxx 66: POW TEMP[9].w, TEMP[5].wwww, IMM[0].yyyy 67: LRP TEMP[5].xyz, TEMP[6].xxxx, TEMP[9], TEMP[7] 68: MAD TEMP[6].y, TEMP[6].xxxx, IMM[2].xxxx, IMM[0].yyyy 69: ABS TEMP[3].x, TEMP[3].xxxx 70: POW TEMP[3].x, TEMP[3].xxxx, TEMP[6].yyyy 71: MUL TEMP[6].w, TEMP[6].yyyy, IMM[2].yyyy 72: MOV TEMP[0].w, TEMP[6].wwww 73: MAD TEMP[3].yzw, TEMP[6].wwww, TEMP[3].xxxx, TEMP[5].xxyz 74: MAD TEMP[3].xyz, TEMP[3].yzww, TEMP[8], TEMP[0] 75: ABS TEMP[5].x, TEMP[3].xxxx 76: LG2 TEMP[4].x, TEMP[5].xxxx 77: ABS TEMP[5].x, TEMP[3].yyyy 78: LG2 TEMP[5].x, TEMP[5].xxxx 79: MOV TEMP[4].y, TEMP[5].xxxx 80: ABS TEMP[3].x, TEMP[3].zzzz 81: LG2 TEMP[3].x, TEMP[3].xxxx 82: MOV TEMP[4].z, TEMP[3].xxxx 83: MUL TEMP[3].xyz, TEMP[4], IMM[2].zzzz 84: EX2 TEMP[4].x, TEMP[3].xxxx 85: EX2 TEMP[5].x, TEMP[3].yyyy 86: MOV TEMP[4].y, TEMP[5].xxxx 87: EX2 TEMP[3].x, TEMP[3].zzzz 88: MOV TEMP[4].z, TEMP[3].xxxx 89: MOV TEMP[3].xyz, TEMP[4].xyzz 90: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 3D 91: MAD TEMP[5].xy, IN[4], IMM[3].xyxx, IMM[0].wyww 92: MOV TEMP[5].xy, TEMP[5].xyyy 93: TEX TEMP[5].xzw, TEMP[5], SAMP[7], 2D 94: MOV TEMP[1].w, TEMP[5].wwww 95: MOV TEMP[6].xy, IN[1].zwww 96: TEX TEMP[6].xyz, TEMP[6], SAMP[8], 2D 97: LRP TEMP[3].yzw, TEMP[5].xxxx, TEMP[3].xxyz, TEMP[6].xxyz 98: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 99: MOV TEMP[6].xyz, TEMP[3].yzww 100: TEX TEMP[6], TEMP[6], SAMP[9], 3D 101: LRP TEMP[3].xyz, TEMP[5].xxxx, TEMP[6], TEMP[3].yzww 102: MOV TEMP[1].xyz, TEMP[3].xyzx 103: ADD TEMP[3].xyz, -TEMP[1], CONST[5] 104: MOV TEMP[0].xyz, TEMP[3].xyzx 105: MUL TEMP[3].z, CONST[8].xxxx, IN[2].zzzz 106: MOV TEMP[4].z, TEMP[3].zzzz 107: MOV TEMP[4].xy, IN[2].xyxx 108: ADD TEMP[3].yzw, TEMP[4].xxyz, -CONST[6].xxyz 109: MUL TEMP[5].w, TEMP[3].wwww, CONST[4].xxxx 110: MUL TEMP[5].w, TEMP[5].wwww, IMM[2].wwww 111: EX2 TEMP[5].x, TEMP[5].wwww 112: ADD TEMP[5].w, -TEMP[5].xxxx, IMM[0].yyyy 113: DP3 TEMP[6].x, TEMP[3].yzww, TEMP[3].yzww 114: RCP TEMP[3].x, TEMP[3].wwww 115: MUL TEMP[6].y, TEMP[6].xxxx, CONST[4].yyyy 116: MUL TEMP[5].w, TEMP[5].wwww, TEMP[6].yyyy 117: MUL TEMP[3].w, TEMP[3].xxxx, TEMP[5].wwww 118: MUL TEMP[3].w, TEMP[3].wwww, IMM[2].wwww 119: EX2 TEMP[3].x, TEMP[3].wwww 120: MOV_SAT TEMP[3].x, TEMP[3].xxxx 121: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].yyyy 122: MAD TEMP[5].x, IN[3].yyyy, IMM[3].zzzz, IMM[3].zzzz 123: MOV TEMP[4].x, TEMP[5].xxxx 124: MOV TEMP[4].y, CONST[4].wwww 125: MOV TEMP[4].xy, TEMP[4].xyyy 126: TEX TEMP[4].x, TEMP[4], SAMP[5], 2D 127: MUL TEMP[3].w, TEMP[3].wwww, TEMP[4].xxxx 128: MOV TEMP[0].w, TEMP[3].wwww 129: MAD TEMP[0].xyz, TEMP[3].wwww, TEMP[0], TEMP[1] 130: MOV TEMP[0].xyz, TEMP[0].xyzx 131: MUL TEMP[1].x, TEMP[2].xxxx, IMM[3].zzzz 132: ADD TEMP[3].y, -CONST[7].xxxx, IN[2].zzzz 133: FSGE TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww 134: UIF TEMP[3].xxxx :0 135: MOV TEMP[2].x, TEMP[2].xxxx 136: ELSE :0 137: MOV TEMP[2].x, TEMP[1].xxxx 138: ENDIF 139: MOV TEMP[0].w, TEMP[2].xxxx 140: MOV OUT[0], TEMP[0] 141: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %91 = bitcast float %81 to i32 %92 = bitcast float %82 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %39 to <32 x i8> %96 = bitcast <4 x i32> %41 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = extractelement <4 x float> %97, i32 3 %102 = call float @llvm.pow.f32(float %98, float 0x40019999A0000000) %103 = call float @llvm.pow.f32(float %99, float 0x40019999A0000000) %104 = call float @llvm.pow.f32(float %100, float 0x40019999A0000000) %105 = call float @llvm.pow.f32(float %101, float 1.000000e+00) %106 = call float @fabs(float %105) %107 = call float @llvm.pow.f32(float %106, float 0x40019999A0000000) %108 = fadd float %107, 0xBFB99999A0000000 %109 = fadd float %107, 0xBFB99999A0000000 %110 = fadd float %107, 0xBFB99999A0000000 %111 = fcmp olt float %108, 0.000000e+00 %112 = sext i1 %111 to i32 %113 = fcmp olt float %109, 0.000000e+00 %114 = sext i1 %113 to i32 %115 = fcmp olt float %110, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %112 to float %118 = bitcast i32 %114 to float %119 = bitcast i32 %116 to float %120 = bitcast float %117 to i32 %121 = bitcast float %119 to i32 %122 = or i32 %120, %121 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = bitcast float %118 to i32 %126 = or i32 %124, %125 %127 = bitcast i32 %126 to float %128 = bitcast float %127 to i32 %129 = icmp ne i32 %128, 0 br i1 %129, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %130 = insertelement <4 x float> undef, float %78, i32 0 %131 = insertelement <4 x float> %130, float %79, i32 1 %132 = insertelement <4 x float> %131, float %80, i32 2 %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 3 %134 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %133) %135 = extractelement <4 x float> %134, i32 0 %136 = extractelement <4 x float> %134, i32 1 %137 = extractelement <4 x float> %134, i32 2 %138 = extractelement <4 x float> %134, i32 3 %139 = call float @fabs(float %137) %140 = fdiv float 1.000000e+00, %139 %141 = fmul float %135, %140 %142 = fadd float %141, 1.500000e+00 %143 = fmul float %136, %140 %144 = fadd float %143, 1.500000e+00 %145 = bitcast float %144 to i32 %146 = bitcast float %142 to i32 %147 = bitcast float %138 to i32 %148 = insertelement <4 x i32> undef, i32 %145, i32 0 %149 = insertelement <4 x i32> %148, i32 %146, i32 1 %150 = insertelement <4 x i32> %149, i32 %147, i32 2 %151 = insertelement <4 x i32> %150, i32 undef, i32 3 %152 = bitcast <8 x i32> %47 to <32 x i8> %153 = bitcast <4 x i32> %49 to <16 x i8> %154 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %151, <32 x i8> %152, <16 x i8> %153, i32 4) %155 = extractelement <4 x float> %154, i32 0 %156 = extractelement <4 x float> %154, i32 1 %157 = extractelement <4 x float> %154, i32 2 %158 = extractelement <4 x float> %154, i32 3 %159 = call float @llvm.pow.f32(float %155, float 0x40019999A0000000) %160 = call float @llvm.pow.f32(float %156, float 0x40019999A0000000) %161 = call float @llvm.pow.f32(float %157, float 0x40019999A0000000) %162 = call float @llvm.pow.f32(float %158, float 1.000000e+00) %163 = fmul float %102, %159 %164 = fmul float %103, %160 %165 = fmul float %104, %161 %166 = fadd float %163, %163 %167 = fadd float %164, %164 %168 = fadd float %165, %165 %169 = fsub float -0.000000e+00, %85 %170 = fadd float %24, %169 %171 = fsub float -0.000000e+00, %86 %172 = fadd float %25, %171 %173 = fsub float -0.000000e+00, %87 %174 = fadd float %26, %173 %175 = fmul float %170, %170 %176 = fmul float %172, %172 %177 = fadd float %176, %175 %178 = fmul float %174, %174 %179 = fadd float %177, %178 %180 = fcmp uge float %179, 0x3E7AD7F2A0000000 %181 = select i1 %180, float %179, float 0x3E7AD7F2A0000000 %182 = call float @llvm.AMDGPU.rsq.clamped.f32(float %181) %183 = fmul float %182, %170 %184 = fmul float %182, %172 %185 = fmul float %182, %174 %186 = fmul float %170, %182 %187 = fadd float %186, 0x3FAB15B580000000 %188 = fmul float %172, %182 %189 = fadd float %188, 0x3FEB126EA0000000 %190 = fmul float %174, %182 %191 = fadd float %190, 0x3FE0ED9160000000 %192 = fmul float %187, %187 %193 = fmul float %189, %189 %194 = fadd float %192, %193 %195 = fmul float %191, %191 %196 = fadd float %194, %195 %197 = fmul float 0.000000e+00, 0.000000e+00 %198 = fadd float %196, %197 %199 = call float @llvm.AMDGPU.rsq.clamped.f32(float %198) %200 = fmul float %187, %199 %201 = fmul float %189, %199 %202 = fmul float %191, %199 %203 = fmul float %200, %78 %204 = fmul float %201, %79 %205 = fadd float %204, %203 %206 = fmul float %202, %80 %207 = fadd float %205, %206 %208 = call float @llvm.AMDIL.clamp.(float %207, float 0.000000e+00, float 1.000000e+00) %209 = fmul float %78, %183 %210 = fmul float %79, %184 %211 = fadd float %210, %209 %212 = fmul float %80, %185 %213 = fadd float %211, %212 %214 = fadd float %213, %213 %215 = fsub float -0.000000e+00, %183 %216 = fmul float %214, %78 %217 = fadd float %216, %215 %218 = fsub float -0.000000e+00, %184 %219 = fmul float %214, %79 %220 = fadd float %219, %218 %221 = fsub float -0.000000e+00, %185 %222 = fmul float %214, %80 %223 = fadd float %222, %221 %224 = insertelement <4 x float> undef, float %217, i32 0 %225 = insertelement <4 x float> %224, float %220, i32 1 %226 = insertelement <4 x float> %225, float %223, i32 2 %227 = insertelement <4 x float> %226, float %162, i32 3 %228 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %227) %229 = extractelement <4 x float> %228, i32 0 %230 = extractelement <4 x float> %228, i32 1 %231 = extractelement <4 x float> %228, i32 2 %232 = extractelement <4 x float> %228, i32 3 %233 = call float @fabs(float %231) %234 = fdiv float 1.000000e+00, %233 %235 = fmul float %229, %234 %236 = fadd float %235, 1.500000e+00 %237 = fmul float %230, %234 %238 = fadd float %237, 1.500000e+00 %239 = bitcast float %238 to i32 %240 = bitcast float %236 to i32 %241 = bitcast float %232 to i32 %242 = insertelement <4 x i32> undef, i32 %239, i32 0 %243 = insertelement <4 x i32> %242, i32 %240, i32 1 %244 = insertelement <4 x i32> %243, i32 %241, i32 2 %245 = insertelement <4 x i32> %244, i32 undef, i32 3 %246 = bitcast <8 x i32> %55 to <32 x i8> %247 = bitcast <4 x i32> %57 to <16 x i8> %248 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %245, <32 x i8> %246, <16 x i8> %247, i32 4) %249 = extractelement <4 x float> %248, i32 0 %250 = extractelement <4 x float> %248, i32 1 %251 = extractelement <4 x float> %248, i32 2 %252 = call float @llvm.pow.f32(float %249, float 0x40019999A0000000) %253 = call float @llvm.pow.f32(float %250, float 0x40019999A0000000) %254 = call float @llvm.pow.f32(float %251, float 0x40019999A0000000) %255 = bitcast float %81 to i32 %256 = bitcast float %82 to i32 %257 = insertelement <2 x i32> undef, i32 %255, i32 0 %258 = insertelement <2 x i32> %257, i32 %256, i32 1 %259 = bitcast <8 x i32> %43 to <32 x i8> %260 = bitcast <4 x i32> %45 to <16 x i8> %261 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %258, <32 x i8> %259, <16 x i8> %260, i32 2) %262 = extractelement <4 x float> %261, i32 0 %263 = extractelement <4 x float> %261, i32 1 %264 = extractelement <4 x float> %261, i32 2 %265 = extractelement <4 x float> %261, i32 3 %266 = call float @llvm.pow.f32(float %262, float 0x40019999A0000000) %267 = call float @llvm.pow.f32(float %263, float 0x40019999A0000000) %268 = call float @llvm.pow.f32(float %264, float 0x40019999A0000000) %269 = call float @llvm.pow.f32(float %265, float 1.000000e+00) %270 = call float @fabs(float %269) %271 = call float @llvm.pow.f32(float %270, float 0x40019999A0000000) %272 = insertelement <4 x float> undef, float %217, i32 0 %273 = insertelement <4 x float> %272, float %220, i32 1 %274 = insertelement <4 x float> %273, float %223, i32 2 %275 = insertelement <4 x float> %274, float %223, i32 3 %276 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %275) %277 = extractelement <4 x float> %276, i32 0 %278 = extractelement <4 x float> %276, i32 1 %279 = extractelement <4 x float> %276, i32 2 %280 = extractelement <4 x float> %276, i32 3 %281 = call float @fabs(float %279) %282 = fdiv float 1.000000e+00, %281 %283 = fmul float %277, %282 %284 = fadd float %283, 1.500000e+00 %285 = fmul float %278, %282 %286 = fadd float %285, 1.500000e+00 %287 = bitcast float %286 to i32 %288 = bitcast float %284 to i32 %289 = bitcast float %280 to i32 %290 = insertelement <4 x i32> undef, i32 %287, i32 0 %291 = insertelement <4 x i32> %290, i32 %288, i32 1 %292 = insertelement <4 x i32> %291, i32 %289, i32 2 %293 = insertelement <4 x i32> %292, i32 undef, i32 3 %294 = bitcast <8 x i32> %51 to <32 x i8> %295 = bitcast <4 x i32> %53 to <16 x i8> %296 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %293, <32 x i8> %294, <16 x i8> %295, i32 4) %297 = extractelement <4 x float> %296, i32 0 %298 = extractelement <4 x float> %296, i32 1 %299 = extractelement <4 x float> %296, i32 2 %300 = call float @llvm.pow.f32(float %297, float 0x40019999A0000000) %301 = call float @llvm.pow.f32(float %298, float 0x40019999A0000000) %302 = call float @llvm.pow.f32(float %299, float 0x40019999A0000000) %303 = call float @llvm.AMDGPU.lrp(float %271, float %300, float %252) %304 = call float @llvm.AMDGPU.lrp(float %271, float %301, float %253) %305 = call float @llvm.AMDGPU.lrp(float %271, float %302, float %254) %306 = fmul float %271, 1.990000e+02 %307 = fadd float %306, 1.000000e+00 %308 = call float @fabs(float %208) %309 = call float @llvm.pow.f32(float %308, float %307) %310 = fmul float %307, 0x3FB99999A0000000 %311 = fmul float %310, %309 %312 = fadd float %311, %303 %313 = fmul float %310, %309 %314 = fadd float %313, %304 %315 = fmul float %310, %309 %316 = fadd float %315, %305 %317 = fmul float %312, %266 %318 = fadd float %317, %166 %319 = fmul float %314, %267 %320 = fadd float %319, %167 %321 = fmul float %316, %268 %322 = fadd float %321, %168 %323 = call float @fabs(float %318) %324 = call float @llvm.log2.f32(float %323) %325 = call float @fabs(float %320) %326 = call float @llvm.log2.f32(float %325) %327 = call float @fabs(float %322) %328 = call float @llvm.log2.f32(float %327) %329 = fmul float %324, 0x3FDD1743E0000000 %330 = fmul float %326, 0x3FDD1743E0000000 %331 = fmul float %328, 0x3FDD1743E0000000 %332 = call float @llvm.AMDIL.exp.(float %329) %333 = call float @llvm.AMDIL.exp.(float %330) %334 = call float @llvm.AMDIL.exp.(float %331) %335 = bitcast float %332 to i32 %336 = bitcast float %333 to i32 %337 = bitcast float %334 to i32 %338 = insertelement <4 x i32> undef, i32 %335, i32 0 %339 = insertelement <4 x i32> %338, i32 %336, i32 1 %340 = insertelement <4 x i32> %339, i32 %337, i32 2 %341 = insertelement <4 x i32> %340, i32 undef, i32 3 %342 = bitcast <8 x i32> %63 to <32 x i8> %343 = bitcast <4 x i32> %65 to <16 x i8> %344 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %341, <32 x i8> %342, <16 x i8> %343, i32 3) %345 = extractelement <4 x float> %344, i32 0 %346 = extractelement <4 x float> %344, i32 1 %347 = extractelement <4 x float> %344, i32 2 %348 = fmul float %89, 1.000000e+00 %349 = fadd float %348, 0.000000e+00 %350 = fmul float %90, -1.000000e+00 %351 = fadd float %350, 1.000000e+00 %352 = bitcast float %349 to i32 %353 = bitcast float %351 to i32 %354 = insertelement <2 x i32> undef, i32 %352, i32 0 %355 = insertelement <2 x i32> %354, i32 %353, i32 1 %356 = bitcast <8 x i32> %67 to <32 x i8> %357 = bitcast <4 x i32> %69 to <16 x i8> %358 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %355, <32 x i8> %356, <16 x i8> %357, i32 2) %359 = extractelement <4 x float> %358, i32 0 %360 = extractelement <4 x float> %358, i32 2 %361 = bitcast float %83 to i32 %362 = bitcast float %84 to i32 %363 = insertelement <2 x i32> undef, i32 %361, i32 0 %364 = insertelement <2 x i32> %363, i32 %362, i32 1 %365 = bitcast <8 x i32> %71 to <32 x i8> %366 = bitcast <4 x i32> %73 to <16 x i8> %367 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %364, <32 x i8> %365, <16 x i8> %366, i32 2) %368 = extractelement <4 x float> %367, i32 0 %369 = extractelement <4 x float> %367, i32 1 %370 = extractelement <4 x float> %367, i32 2 %371 = call float @llvm.AMDGPU.lrp(float %359, float %345, float %368) %372 = call float @llvm.AMDGPU.lrp(float %359, float %346, float %369) %373 = call float @llvm.AMDGPU.lrp(float %359, float %347, float %370) %374 = fsub float -0.000000e+00, %359 %375 = fmul float %360, %374 %376 = fadd float %375, %359 %377 = bitcast float %371 to i32 %378 = bitcast float %372 to i32 %379 = bitcast float %373 to i32 %380 = insertelement <4 x i32> undef, i32 %377, i32 0 %381 = insertelement <4 x i32> %380, i32 %378, i32 1 %382 = insertelement <4 x i32> %381, i32 %379, i32 2 %383 = insertelement <4 x i32> %382, i32 undef, i32 3 %384 = bitcast <8 x i32> %75 to <32 x i8> %385 = bitcast <4 x i32> %77 to <16 x i8> %386 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %383, <32 x i8> %384, <16 x i8> %385, i32 3) %387 = extractelement <4 x float> %386, i32 0 %388 = extractelement <4 x float> %386, i32 1 %389 = extractelement <4 x float> %386, i32 2 %390 = call float @llvm.AMDGPU.lrp(float %376, float %387, float %371) %391 = call float @llvm.AMDGPU.lrp(float %376, float %388, float %372) %392 = call float @llvm.AMDGPU.lrp(float %376, float %389, float %373) %393 = fsub float -0.000000e+00, %390 %394 = fadd float %393, %30 %395 = fsub float -0.000000e+00, %391 %396 = fadd float %395, %31 %397 = fsub float -0.000000e+00, %392 %398 = fadd float %397, %32 %399 = fmul float %37, %87 %400 = fsub float -0.000000e+00, %33 %401 = fadd float %85, %400 %402 = fsub float -0.000000e+00, %34 %403 = fadd float %86, %402 %404 = fsub float -0.000000e+00, %35 %405 = fadd float %399, %404 %406 = fmul float %405, %27 %407 = fmul float %406, 0x3FF7154CA0000000 %408 = call float @llvm.AMDIL.exp.(float %407) %409 = fsub float -0.000000e+00, %408 %410 = fadd float %409, 1.000000e+00 %411 = fmul float %401, %401 %412 = fmul float %403, %403 %413 = fadd float %412, %411 %414 = fmul float %405, %405 %415 = fadd float %413, %414 %416 = fdiv float 1.000000e+00, %405 %417 = fmul float %415, %28 %418 = fmul float %410, %417 %419 = fmul float %416, %418 %420 = fmul float %419, 0x3FF7154CA0000000 %421 = call float @llvm.AMDIL.exp.(float %420) %422 = call float @llvm.AMDIL.clamp.(float %421, float 0.000000e+00, float 1.000000e+00) %423 = fsub float -0.000000e+00, %422 %424 = fadd float %423, 1.000000e+00 %425 = fmul float %88, 5.000000e-01 %426 = fadd float %425, 5.000000e-01 %427 = bitcast float %426 to i32 %428 = bitcast float %29 to i32 %429 = insertelement <2 x i32> undef, i32 %427, i32 0 %430 = insertelement <2 x i32> %429, i32 %428, i32 1 %431 = bitcast <8 x i32> %59 to <32 x i8> %432 = bitcast <4 x i32> %61 to <16 x i8> %433 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %430, <32 x i8> %431, <16 x i8> %432, i32 2) %434 = extractelement <4 x float> %433, i32 0 %435 = fmul float %424, %434 %436 = fmul float %435, %394 %437 = fadd float %436, %390 %438 = fmul float %435, %396 %439 = fadd float %438, %391 %440 = fmul float %435, %398 %441 = fadd float %440, %392 %442 = fmul float %107, 5.000000e-01 %443 = fsub float -0.000000e+00, %36 %444 = fadd float %443, %87 %445 = fcmp oge float %444, 0.000000e+00 %446 = sext i1 %445 to i32 %447 = bitcast i32 %446 to float %448 = bitcast float %447 to i32 %449 = icmp ne i32 %448, 0 %. = select i1 %449, float %107, float %442 %450 = call i32 @llvm.SI.packf16(float %437, float %439) %451 = bitcast i32 %450 to float %452 = call i32 @llvm.SI.packf16(float %441, float %.) %453 = bitcast i32 %452 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %451, float %453, float %451, float %453) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v26, v0, 1, 1, [m0] ; C8680500 V_INTERP_P2_F32 v26, [v26], v1, 1, 1, [m0] ; C8690501 V_INTERP_P1_F32 v25, v0, 0, 1, [m0] ; C8640400 V_INTERP_P2_F32 v25, [v25], v1, 0, 1, [m0] ; C8650401 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x0 ; C0860500 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x0 ; C0C80700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800F00 00640219 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v6, v4 ; 7E0C4F04 V_MUL_LEGACY_F32_e32 v6, 2.200000e+00, v6 ; 0E0C0CFF 400CCCCD V_EXP_F32_e32 v17, v6 ; 7E224B06 V_LOG_F32_e32 v6, v3 ; 7E0C4F03 V_MUL_LEGACY_F32_e32 v6, 2.200000e+00, v6 ; 0E0C0CFF 400CCCCD V_EXP_F32_e32 v11, v6 ; 7E164B06 V_LOG_F32_e32 v6, v2 ; 7E0C4F02 V_MUL_LEGACY_F32_e32 v6, 2.200000e+00, v6 ; 0E0C0CFF 400CCCCD V_EXP_F32_e32 v10, v6 ; 7E144B06 V_LOG_F32_e32 v2, v5 ; 7E044F05 V_MUL_LEGACY_F32_e32 v2, 1.000000e+00, v2 ; 0E0404F2 V_EXP_F32_e32 v2, v2 ; 7E044B02 V_MOV_B32_e32 v3, 0x7fffffff ; 7E0602FF 7FFFFFFF V_AND_B32_e32 v2, v2, v3 ; 36040702 V_LOG_F32_e32 v2, v2 ; 7E044F02 V_MUL_LEGACY_F32_e32 v2, 2.200000e+00, v2 ; 0E0404FF 400CCCCD V_EXP_F32_e32 v2, v2 ; 7E044B02 V_ADD_F32_e32 v3, -1.000000e-01, v2 ; 060604FF BDCCCCCD V_CMP_LT_F32_e64 s[0:1], v3, 0.000000e+00, 0, 0 ; D0020000 00010103 V_CNDMASK_B32_e64 v3, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000003 00018280 V_OR_B32_e32 v3, v3, v3 ; 38060703 V_CMP_NE_I32_e64 s[0:1], v3, 0, 0, 0 ; D10A0000 00010103 V_INTERP_P1_F32 v6, v0, 1, 4, [m0] ; C8181100 V_INTERP_P2_F32 v6, [v6], v1, 1, 4, [m0] ; C8191101 V_INTERP_P1_F32 v8, v0, 0, 4, [m0] ; C8201000 V_INTERP_P2_F32 v8, [v8], v1, 0, 4, [m0] ; C8211001 V_INTERP_P1_F32 v4, v0, 1, 3, [m0] ; C8100D00 V_INTERP_P2_F32 v4, [v4], v1, 1, 3, [m0] ; C8110D01 V_INTERP_P1_F32 v3, v0, 2, 2, [m0] ; C80C0A00 V_INTERP_P2_F32 v3, [v3], v1, 2, 2, [m0] ; C80D0A01 V_INTERP_P1_F32 v5, v0, 1, 2, [m0] ; C8140900 V_INTERP_P2_F32 v5, [v5], v1, 1, 2, [m0] ; C8150901 V_INTERP_P1_F32 v7, v0, 0, 2, [m0] ; C81C0800 V_INTERP_P2_F32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 V_INTERP_P1_F32 v16, v0, 3, 1, [m0] ; C8400700 V_INTERP_P2_F32 v16, [v16], v1, 3, 1, [m0] ; C8410701 V_INTERP_P1_F32 v15, v0, 2, 1, [m0] ; C83C0600 V_INTERP_P2_F32 v15, [v15], v1, 2, 1, [m0] ; C83D0601 V_INTERP_P1_F32 v29, v0, 2, 0, [m0] ; C8740200 V_INTERP_P2_F32 v29, [v29], v1, 2, 0, [m0] ; C8750201 V_INTERP_P1_F32 v28, v0, 1, 0, [m0] ; C8700100 V_INTERP_P2_F32 v28, [v28], v1, 1, 0, [m0] ; C8710101 V_INTERP_P1_F32 v27, v0, 0, 0, [m0] ; C86C0000 V_INTERP_P2_F32 v27, [v27], v1, 0, 0, [m0] ; C86D0001 S_LOAD_DWORDX4 s[8:11], s[2:3], 0x0 ; C0840300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s2, s[8:11], 0x20 ; C2010920 S_BUFFER_LOAD_DWORD s3, s[8:11], 0x1c ; C201891C S_BUFFER_LOAD_DWORD s12, s[8:11], 0x1a ; C206091A S_BUFFER_LOAD_DWORD s13, s[8:11], 0x19 ; C2068919 S_BUFFER_LOAD_DWORD s14, s[8:11], 0x18 ; C2070918 S_BUFFER_LOAD_DWORD s15, s[8:11], 0x16 ; C2078916 S_BUFFER_LOAD_DWORD s16, s[8:11], 0x15 ; C2080915 S_BUFFER_LOAD_DWORD s17, s[8:11], 0x14 ; C2088914 S_BUFFER_LOAD_DWORD s18, s[8:11], 0x13 ; C2090913 S_BUFFER_LOAD_DWORD s19, s[8:11], 0x11 ; C2098911 S_BUFFER_LOAD_DWORD s20, s[8:11], 0x10 ; C20A0910 S_BUFFER_LOAD_DWORD s21, s[8:11], 0xe ; C20A890E S_BUFFER_LOAD_DWORD s22, s[8:11], 0xd ; C20B090D S_BUFFER_LOAD_DWORD s8, s[8:11], 0xc ; C204090C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v18, s2 ; 7E240202 V_MOV_B32_e32 v0, s3 ; 7E000203 V_MOV_B32_e32 v19, s12 ; 7E26020C V_MOV_B32_e32 v23, s13 ; 7E2E020D V_MOV_B32_e32 v22, s14 ; 7E2C020E V_MOV_B32_e32 v1, s15 ; 7E02020F V_MOV_B32_e32 v24, s16 ; 7E300210 V_MOV_B32_e32 v12, s17 ; 7E180211 V_MOV_B32_e32 v14, s18 ; 7E1C0212 V_MOV_B32_e32 v21, s19 ; 7E2A0213 V_MOV_B32_e32 v20, s20 ; 7E280214 V_MOV_B32_e32 v31, s21 ; 7E3E0215 V_MOV_B32_e32 v33, s22 ; 7E420216 V_MOV_B32_e32 v32, s8 ; 7E400208 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E V_MOV_B32_e32 v30, 0.000000e+00 ; 7E3C0280 V_CUBESC_F32 v35, v27, v28, v29, 0, 0 ; D28A0023 0476391B V_CUBETC_F32 v34, v27, v28, v29, 0, 0 ; D28C0022 0476391B V_CUBEMA_F32 v36, v27, v28, v29, 0, 0 ; D28E0024 0476391B V_CUBEID_F32 v37, v27, v28, v29, 0, 0 ; D2880025 0476391B V_MOV_B32_e32 v42, 0x7fffffff ; 7E5402FF 7FFFFFFF V_AND_B32_e32 v42, v36, v42 ; 36545524 V_RCP_F32_e32 v42, v42 ; 7E54552A V_MOV_B32_e32 v43, 1.500000e+00 ; 7E5602FF 3FC00000 V_MAD_F32 v36, v34, v42, v43, 0, 0 ; D2820024 04AE5522 V_MAD_F32 v35, v35, v42, v43, 0, 0 ; D2820023 04AE5523 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x8 ; C0800508 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x10 ; C0C40710 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[8:15], s[0:3] ; F0800F00 00022223 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v38, v37 ; 7E4C4F25 V_MUL_LEGACY_F32_e32 v38, 1.000000e+00, v38 ; 0E4C4CF2 V_EXP_F32_e32 v41, v38 ; 7E524B26 V_SUB_F32_e32 v33, v33, v5 ; 08420B21 V_SUB_F32_e32 v32, v32, v7 ; 08400F20 V_MUL_F32_e32 v42, v32, v32 ; 10544120 V_MAD_F32 v42, v33, v33, v42, 0, 0 ; D282002A 04AA4321 V_SUB_F32_e32 v31, v31, v3 ; 083E071F V_MAD_F32 v42, v31, v31, v42, 0, 0 ; D282002A 04AA3F1F V_MOV_B32_e32 v44, 1.000000e-07 ; 7E5802FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v42, v44 ; 7C0C592A V_CMP_U_F32_e64 s[0:1], v42, v42, 0, 0 ; D0100000 0002552A V_CNDMASK_B32_e64 v44, 0, -1, vcc, 0, 0, 0, 0 ; D200002C 01A98280 V_CNDMASK_B32_e64 v45, 0, -1, s[0:1], 0, 0, 0, 0 ; D200002D 00018280 V_OR_B32_e32 v44, v44, v45 ; 38585B2C V_MOV_B32_e32 v45, 0x33d6bf95 ; 7E5A02FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v44, 0, 0, 0 ; D10A0000 0001012C V_CNDMASK_B32_e64 v42, v45, v42, s[0:1], 0, 0, 0, 0 ; D200002A 0002552D V_RSQ_CLAMP_F32_e32 v42, v42 ; 7E54592A V_MUL_F32_e32 v44, v33, v42 ; 10585521 V_MUL_F32_e32 v45, v32, v42 ; 105A5520 V_MUL_F32_e32 v46, v27, v45 ; 105C5B1B V_MAD_F32 v46, v28, v44, v46, 0, 0 ; D282002E 04BA591C V_MUL_F32_e32 v47, v31, v42 ; 105E551F V_MAD_F32 v46, v29, v47, v46, 0, 0 ; D282002E 04BA5F1D V_ADD_F32_e32 v46, v46, v46 ; 065C5D2E V_MUL_F32_e32 v48, v46, v29 ; 10603B2E V_SUB_F32_e32 v40, v48, v47 ; 08505F30 V_MUL_F32_e32 v47, v46, v28 ; 105E392E V_SUB_F32_e32 v39, v47, v44 ; 084E592F V_MUL_F32_e32 v44, v46, v27 ; 1058372E V_SUB_F32_e32 v38, v44, v45 ; 084C5B2C V_CUBESC_F32 v45, v38, v39, v40, 0, 0 ; D28A002D 04A24F26 V_CUBETC_F32 v44, v38, v39, v40, 0, 0 ; D28C002C 04A24F26 V_CUBEMA_F32 v46, v38, v39, v40, 0, 0 ; D28E002E 04A24F26 V_CUBEID_F32 v47, v38, v39, v40, 0, 0 ; D288002F 04A24F26 V_MOV_B32_e32 v52, 0x7fffffff ; 7E6802FF 7FFFFFFF V_AND_B32_e32 v52, v46, v52 ; 3668692E V_RCP_F32_e32 v52, v52 ; 7E685534 V_MAD_F32 v46, v44, v52, v43, 0, 0 ; D282002E 04AE692C V_MAD_F32 v45, v45, v52, v43, 0, 0 ; D282002D 04AE692D S_LOAD_DWORDX4 s[0:3], s[4:5], 0x10 ; C0800510 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x20 ; C0C40720 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[44:46], 7, 0, 0, 0, 0, 0, 0, 0, v[45:48], s[8:15], s[0:3] ; F0800700 00022C2D S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v47, v46 ; 7E5E4F2E V_MUL_LEGACY_F32_e32 v47, 2.200000e+00, v47 ; 0E5E5EFF 400CCCCD V_EXP_F32_e32 v47, v47 ; 7E5E4B2F S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x8 ; C0C40708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[48:51], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[8:15], s[0:3] ; F0800F00 00023019 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v25, v51 ; 7E324F33 V_MUL_LEGACY_F32_e32 v25, 1.000000e+00, v25 ; 0E3232F2 V_EXP_F32_e32 v25, v25 ; 7E324B19 V_MOV_B32_e32 v26, 0x7fffffff ; 7E3402FF 7FFFFFFF V_AND_B32_e32 v25, v25, v26 ; 36323519 V_LOG_F32_e32 v25, v25 ; 7E324F19 V_MUL_LEGACY_F32_e32 v25, 2.200000e+00, v25 ; 0E3232FF 400CCCCD V_EXP_F32_e32 v25, v25 ; 7E324B19 V_SUB_F32_e32 v26, 1.000000e+00, v25 ; 083432F2 V_MUL_F32_e32 v47, v26, v47 ; 105E5F1A V_MOV_B32_e32 v41, v40 ; 7E520328 V_CUBESC_F32 v53, v38, v39, v40, 0, 0 ; D28A0035 04A24F26 V_CUBETC_F32 v52, v38, v39, v40, 0, 0 ; D28C0034 04A24F26 V_CUBEMA_F32 v54, v38, v39, v40, 0, 0 ; D28E0036 04A24F26 V_CUBEID_F32 v55, v38, v39, v40, 0, 0 ; D2880037 04A24F26 V_MOV_B32_e32 v38, 0x7fffffff ; 7E4C02FF 7FFFFFFF V_AND_B32_e32 v38, v54, v38 ; 364C4D36 V_RCP_F32_e32 v38, v38 ; 7E4C5526 V_MAD_F32 v54, v52, v38, v43, 0, 0 ; D2820036 04AE4D34 V_MAD_F32 v53, v53, v38, v43, 0, 0 ; D2820035 04AE4D35 S_LOAD_DWORDX4 s[0:3], s[4:5], 0xc ; C080050C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x18 ; C0C40718 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[38:40], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[8:15], s[0:3] ; F0800700 00022635 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v41, v40 ; 7E524F28 V_MUL_LEGACY_F32_e32 v41, 2.200000e+00, v41 ; 0E5252FF 400CCCCD V_EXP_F32_e32 v41, v41 ; 7E524B29 V_MAD_F32 v41, v25, v41, v47, 0, 0 ; D2820029 04BE5319 V_MOV_B32_e32 v43, 5.290000e-02 ; 7E5602FF 3D58ADAC V_MAD_F32 v32, v32, v42, v43, 0, 0 ; D2820020 04AE5520 V_MOV_B32_e32 v43, 8.460000e-01 ; 7E5602FF 3F589375 V_MAD_F32 v33, v33, v42, v43, 0, 0 ; D2820021 04AE5521 V_MUL_F32_e32 v43, v33, v33 ; 10564321 V_MAD_F32 v43, v32, v32, v43, 0, 0 ; D282002B 04AE4120 V_MOV_B32_e32 v47, 5.290000e-01 ; 7E5E02FF 3F076C8B V_MAD_F32 v31, v31, v42, v47, 0, 0 ; D282001F 04BE551F V_MAD_F32 v42, v31, v31, v43, 0, 0 ; D282002A 04AE3F1F V_RSQ_CLAMP_F32_e32 v42, v42 ; 7E54592A V_MUL_F32_e32 v33, v33, v42 ; 10425521 V_MUL_F32_e32 v32, v32, v42 ; 10405520 V_MUL_F32_e32 v32, v32, v27 ; 10403720 V_MAD_F32 v32, v33, v28, v32, 0, 0 ; D2820020 04823921 V_MUL_F32_e32 v31, v31, v42 ; 103E551F V_MAD_F32 v27, v31, v29, v32, 0, 0 ; D282001B 04823B1F V_ADD_F32_e64 v27, v27, 0, 1, 0 ; D206081B 0001011B V_MOV_B32_e32 v28, 0x7fffffff ; 7E3802FF 7FFFFFFF V_AND_B32_e32 v27, v27, v28 ; 3636391B V_LOG_F32_e32 v27, v27 ; 7E364F1B V_MOV_B32_e32 v28, 1.990000e+02 ; 7E3802FF 43470000 V_MAD_F32 v28, v25, v28, 1.000000e+00, 0, 0 ; D282001C 03CA3919 V_MUL_LEGACY_F32_e32 v27, v28, v27 ; 0E36371C V_EXP_F32_e32 v27, v27 ; 7E364B1B V_MUL_F32_e32 v28, 1.000000e-01, v28 ; 103838FF 3DCCCCCD V_MAD_F32 v29, v28, v27, v41, 0, 0 ; D282001D 04A6371C V_LOG_F32_e32 v30, v36 ; 7E3C4F24 V_MUL_LEGACY_F32_e32 v30, 2.200000e+00, v30 ; 0E3C3CFF 400CCCCD V_EXP_F32_e32 v30, v30 ; 7E3C4B1E V_MUL_F32_e32 v31, v17, v30 ; 103E3D11 V_MAD_F32 v17, v17, v30, v31, 0, 0 ; D2820011 047E3D11 V_LOG_F32_e32 v30, v50 ; 7E3C4F32 V_MUL_LEGACY_F32_e32 v30, 2.200000e+00, v30 ; 0E3C3CFF 400CCCCD V_EXP_F32_e32 v30, v30 ; 7E3C4B1E V_MAD_F32 v17, v29, v30, v17, 0, 0 ; D2820011 04463D1D V_MOV_B32_e32 v29, 0x7fffffff ; 7E3A02FF 7FFFFFFF V_AND_B32_e32 v17, v17, v29 ; 36223B11 V_LOG_F32_e32 v17, v17 ; 7E224F11 V_MUL_F32_e32 v17, 4.545450e-01, v17 ; 102222FF 3EE8BA1F V_EXP_F32_e32 v31, v17 ; 7E3E4B11 V_LOG_F32_e32 v17, v45 ; 7E224F2D V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_MUL_F32_e32 v17, v26, v17 ; 1022231A V_LOG_F32_e32 v33, v39 ; 7E424F27 V_MUL_LEGACY_F32_e32 v33, 2.200000e+00, v33 ; 0E4242FF 400CCCCD V_EXP_F32_e32 v33, v33 ; 7E424B21 V_MAD_F32 v17, v25, v33, v17, 0, 0 ; D2820011 04464319 V_MAD_F32 v17, v28, v27, v17, 0, 0 ; D2820011 0446371C V_LOG_F32_e32 v33, v35 ; 7E424F23 V_MUL_LEGACY_F32_e32 v33, 2.200000e+00, v33 ; 0E4242FF 400CCCCD V_EXP_F32_e32 v33, v33 ; 7E424B21 V_MUL_F32_e32 v41, v11, v33 ; 1052430B V_MAD_F32 v11, v11, v33, v41, 0, 0 ; D282000B 04A6430B V_LOG_F32_e32 v33, v49 ; 7E424F31 V_MUL_LEGACY_F32_e32 v33, 2.200000e+00, v33 ; 0E4242FF 400CCCCD V_EXP_F32_e32 v33, v33 ; 7E424B21 V_MAD_F32 v11, v17, v33, v11, 0, 0 ; D282000B 042E4311 V_MOV_B32_e32 v17, 0x7fffffff ; 7E2202FF 7FFFFFFF V_AND_B32_e32 v11, v11, v17 ; 3616230B V_LOG_F32_e32 v11, v11 ; 7E164F0B V_MUL_F32_e32 v11, 4.545450e-01, v11 ; 101616FF 3EE8BA1F V_EXP_F32_e32 v30, v11 ; 7E3C4B0B V_LOG_F32_e32 v11, v44 ; 7E164F2C V_MUL_LEGACY_F32_e32 v11, 2.200000e+00, v11 ; 0E1616FF 400CCCCD V_EXP_F32_e32 v11, v11 ; 7E164B0B V_MUL_F32_e32 v11, v26, v11 ; 1016171A V_LOG_F32_e32 v17, v38 ; 7E224F26 V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_MAD_F32 v11, v25, v17, v11, 0, 0 ; D282000B 042E2319 V_MAD_F32 v11, v28, v27, v11, 0, 0 ; D282000B 042E371C V_LOG_F32_e32 v17, v34 ; 7E224F22 V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_MUL_F32_e32 v25, v10, v17 ; 1032230A V_MAD_F32 v10, v10, v17, v25, 0, 0 ; D282000A 0466230A V_LOG_F32_e32 v17, v48 ; 7E224F30 V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_MAD_F32 v10, v11, v17, v10, 0, 0 ; D282000A 042A230B V_MOV_B32_e32 v11, 0x7fffffff ; 7E1602FF 7FFFFFFF V_AND_B32_e32 v10, v10, v11 ; 3614170A V_LOG_F32_e32 v10, v10 ; 7E144F0A V_MUL_F32_e32 v10, 4.545450e-01, v10 ; 101414FF 3EE8BA1F V_EXP_F32_e32 v29, v10 ; 7E3A4B0A S_LOAD_DWORDX4 s[0:3], s[4:5], 0x18 ; C0800518 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x30 ; C0C40730 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[8:15], s[0:3] ; F0800700 0002191D S_LOAD_DWORDX4 s[0:3], s[4:5], 0x20 ; C0800520 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x40 ; C0C40740 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 IMAGE_SAMPLE v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[8:15], s[0:3] ; F0800700 00020F0F V_SUB_F32_e32 v9, 1.000000e+00, v6 ; 08120CF2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x1c ; C080051C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x38 ; C0C40738 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 IMAGE_SAMPLE v[8:9], 5, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800500 00020808 S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v6, 1.000000e+00, v8 ; 080C10F2 V_MUL_F32_e32 v10, v6, v17 ; 10142306 V_MAD_F32 v30, v8, v27, v10, 0, 0 ; D282001E 042A3708 V_MUL_F32_e32 v10, v6, v16 ; 10142106 V_MAD_F32 v29, v8, v26, v10, 0, 0 ; D282001D 042A3508 V_MUL_F32_e32 v6, v6, v15 ; 100C1F06 V_MAD_F32 v28, v8, v25, v6, 0, 0 ; D282001C 041A3308 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x24 ; C0800524 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x48 ; C0C40748 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[8:15], s[0:3] ; F0800700 00020F1C V_MUL_F32_e32 v6, v9, v8 ; 100C1109 V_SUB_F32_e32 v6, v8, v6 ; 080C0D08 V_SUB_F32_e32 v8, 1.000000e+00, v6 ; 08100CF2 V_MUL_F32_e32 v9, v8, v29 ; 10123B08 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v9, v6, v16, v9, 0, 0 ; D2820009 04262106 V_SUB_F32_e32 v10, v24, v9 ; 08141318 V_SUB_F32_e32 v5, v5, v23 ; 080A2F05 V_SUB_F32_e32 v7, v7, v22 ; 080E2D07 V_MUL_F32_e32 v7, v7, v7 ; 100E0F07 V_MAD_F32 v5, v5, v5, v7, 0, 0 ; D2820005 041E0B05 V_MUL_F32_e32 v7, v18, v3 ; 100E0712 V_SUB_F32_e32 v7, v7, v19 ; 080E2707 V_MAD_F32 v5, v7, v7, v5, 0, 0 ; D2820005 04160F07 V_MUL_F32_e32 v5, v5, v21 ; 100A2B05 V_MUL_F32_e32 v11, v7, v20 ; 10162907 V_MUL_F32_e32 v11, 1.442700e+00, v11 ; 101616FF 3FB8AA65 V_EXP_F32_e32 v11, v11 ; 7E164B0B V_SUB_F32_e32 v11, 1.000000e+00, v11 ; 081616F2 V_MUL_F32_e32 v5, v11, v5 ; 100A0B0B V_RCP_F32_e32 v7, v7 ; 7E0E5507 V_MUL_F32_e32 v5, v7, v5 ; 100A0B07 V_MUL_F32_e32 v5, 1.442700e+00, v5 ; 100A0AFF 3FB8AA65 V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_ADD_F32_e64 v5, v5, 0, 1, 0 ; D2060805 00010105 V_SUB_F32_e32 v5, 1.000000e+00, v5 ; 080A0AF2 V_MAD_F32 v13, v4, 5.000000e-01, 5.000000e-01, 0, 0 ; D282000D 03C1E104 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x14 ; C0800514 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x28 ; C0C20728 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v4, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[4:11], s[0:3] ; F0800100 0001040D S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v5, v4 ; 10080905 V_MAD_F32 v5, v4, v10, v9, 0, 0 ; D2820005 04261504 V_MUL_F32_e32 v7, v8, v28 ; 100E3908 V_MAD_F32 v7, v6, v15, v7, 0, 0 ; D2820007 041E1F06 V_SUB_F32_e32 v9, v12, v7 ; 08120F0C V_MAD_F32 v7, v4, v9, v7, 0, 0 ; D2820007 041E1304 V_CVT_PKRTZ_F16_F32_e32 v5, v7, v5 ; 5E0A0B07 V_MUL_F32_e32 v7, v8, v30 ; 100E3D08 V_MAD_F32 v6, v6, v17, v7, 0, 0 ; D2820006 041E2306 V_SUB_F32_e32 v1, v1, v6 ; 08020D01 V_MAD_F32 v1, v4, v1, v6, 0, 0 ; D2820001 041A0304 V_SUB_F32_e32 v0, v3, v0 ; 08000103 V_CMP_GE_F32_e64 s[0:1], v0, 0.000000e+00, 0, 0 ; D00C0000 00010100 V_MUL_F32_e32 v0, 5.000000e-01, v2 ; 100004F0 V_CNDMASK_B32_e64 v0, v0, v2, s[0:1], 0, 0, 0, 0 ; D2000000 00020500 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 EXP 15, 0, 1, 1, 1, v5, v0, v5, v0 ; F8001C0F 00050005 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..98] DCL TEMP[0..6], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.5000, -0.5000, 0.0000, 1.0000} 0: F2I TEMP[0].x, IN[2].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: UARL ADDR[0].x, TEMP[0].xxxx 3: MOV TEMP[1], CONST[ADDR[0].x+9] 4: UARL ADDR[0].x, TEMP[0].xxxx 5: MAD TEMP[1].xyz, IN[0], CONST[ADDR[0].x+9].wwww, TEMP[1] 6: MOV TEMP[2].xyz, TEMP[1].xyzx 7: UARL ADDR[0].x, TEMP[0].xxxx 8: MOV TEMP[0].xyz, CONST[ADDR[0].x+9].xyzx 9: MUL TEMP[3], TEMP[1].yyyy, CONST[1] 10: MAD TEMP[3], TEMP[1].xxxx, CONST[0], TEMP[3] 11: MAD TEMP[3], TEMP[1].zzzz, CONST[2], TEMP[3] 12: ADD TEMP[3], TEMP[3], CONST[3] 13: RCP TEMP[4].x, TEMP[3].wwww 14: MOV TEMP[2].w, TEMP[4].xxxx 15: MUL TEMP[4].xy, TEMP[4].xxxx, TEMP[3] 16: MOV TEMP[4].xy, TEMP[4].xyxx 17: MOV TEMP[3], TEMP[3] 18: ADD TEMP[5].xy, TEMP[2], CONST[8].zwzw 19: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 20: MOV TEMP[5].zw, TEMP[5].wwzw 21: MUL TEMP[6].yw, TEMP[1].yyyy, CONST[5].xxzy 22: MOV TEMP[2].yw, TEMP[6].wyww 23: MAD TEMP[6].xy, TEMP[1].xxxx, CONST[4], TEMP[2].ywzw 24: MOV TEMP[2].xy, TEMP[6].xyxx 25: MAD TEMP[1].xy, TEMP[1].zzzz, CONST[6], TEMP[2] 26: MOV TEMP[2].xy, TEMP[1].xyxx 27: ADD TEMP[1].xy, TEMP[2], CONST[7] 28: MOV TEMP[2].xy, TEMP[1].xyxx 29: MAD TEMP[1].xy, TEMP[2], IMM[0].xyzz, IMM[0].yyyy 30: MOV TEMP[1].xy, TEMP[1].xyxx 31: MOV TEMP[5].xy, IN[1].xyxx 32: MOV TEMP[0].w, IMM[0].wwww 33: MOV TEMP[1].zw, IMM[0].wwzw 34: MOV TEMP[4].zw, IMM[0].wwzw 35: MOV OUT[1], TEMP[5] 36: MOV OUT[0], TEMP[3] 37: MOV OUT[2], TEMP[0] 38: MOV OUT[3], TEMP[1] 39: MOV OUT[4], TEMP[4] 40: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0 %50 = add i32 %5, %7 %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %50) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = fptosi float %58 to i32 %60 = bitcast i32 %59 to float %61 = bitcast float %60 to i32 %62 = shl i32 %61, 4 %63 = add i32 %62, 144 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %61, 4 %66 = add i32 %65, 148 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = shl i32 %61, 4 %69 = add i32 %68, 152 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = bitcast float %60 to i32 %72 = shl i32 %71, 4 %73 = add i32 %72, 156 %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %73) %75 = fmul float %45, %74 %76 = fadd float %75, %64 %77 = shl i32 %71, 4 %78 = add i32 %77, 156 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = fmul float %46, %79 %81 = fadd float %80, %67 %82 = shl i32 %71, 4 %83 = add i32 %82, 156 %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %83) %85 = fmul float %47, %84 %86 = fadd float %85, %70 %87 = bitcast float %60 to i32 %88 = shl i32 %87, 4 %89 = add i32 %88, 144 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = shl i32 %87, 4 %92 = add i32 %91, 148 %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %92) %94 = shl i32 %87, 4 %95 = add i32 %94, 152 %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %95) %97 = fmul float %81, %17 %98 = fmul float %81, %18 %99 = fmul float %81, %19 %100 = fmul float %81, %20 %101 = fmul float %76, %13 %102 = fadd float %101, %97 %103 = fmul float %76, %14 %104 = fadd float %103, %98 %105 = fmul float %76, %15 %106 = fadd float %105, %99 %107 = fmul float %76, %16 %108 = fadd float %107, %100 %109 = fmul float %86, %21 %110 = fadd float %109, %102 %111 = fmul float %86, %22 %112 = fadd float %111, %104 %113 = fmul float %86, %23 %114 = fadd float %113, %106 %115 = fmul float %86, %24 %116 = fadd float %115, %108 %117 = fadd float %110, %25 %118 = fadd float %112, %26 %119 = fadd float %114, %27 %120 = fadd float %116, %28 %121 = fdiv float 1.000000e+00, %120 %122 = fmul float %121, %117 %123 = fmul float %121, %118 %124 = fadd float %76, %39 %125 = fadd float %81, %40 %126 = fmul float %124, %37 %127 = fmul float %125, %38 %128 = fmul float %81, %31 %129 = fmul float %81, %32 %130 = fmul float %76, %29 %131 = fadd float %130, %128 %132 = fmul float %76, %30 %133 = fadd float %132, %129 %134 = fmul float %86, %33 %135 = fadd float %134, %131 %136 = fmul float %86, %34 %137 = fadd float %136, %133 %138 = fadd float %135, %35 %139 = fadd float %137, %36 %140 = fmul float %138, 5.000000e-01 %141 = fadd float %140, -5.000000e-01 %142 = fmul float %139, -5.000000e-01 %143 = fadd float %142, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %52, float %53, float %126, float %127) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %90, float %93, float %96, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %141, float %143, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %122, float %123, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %117, float %118, float %119, float %120) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_I32_F32_e32 v1, v1 ; 7E021101 V_LSHLREV_B32_e32 v1, 4, v1 ; 34020284 V_ADD_I32_e32 v2, 0x94, v1 ; 4A0402FF 00000094 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 V_ADD_I32_e32 v3, 0x9c, v1 ; 4A0602FF 0000009C BUFFER_LOAD_DWORD v3, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000303 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[4:7], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010400 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v8, v5, v3, v2, 0, 0 ; D2820008 040A0705 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x23 ; C2020123 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v9, s4, v8 ; 06121004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v9, s4, v9 ; 10121204 V_ADD_I32_e32 v10, 0x90, v1 ; 4A1402FF 00000090 BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v11, v4, v3, v10, 0, 0 ; D282000B 042A0704 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x22 ; C2020122 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v12, s4, v11 ; 06181604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x20 ; C2020120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v12, s4, v12 ; 10181804 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[13:16], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010D00 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v13, v14, v12, v9 ; F800020F 090C0E0D V_ADD_I32_e32 v0, 0x98, v1 ; 4A0002FF 00000098 BUFFER_LOAD_DWORD v0, s[0:3] + v0 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000000 V_MOV_B32_e32 v1, 1.000000e+00 ; 7E0202F2 S_WAITCNT vmcnt(0) expcnt(0) ; BF8C0700 EXP 15, 33, 0, 0, 0, v10, v2, v0, v1 ; F800021F 0100020A S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v2, s4, v8 ; 10041004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x11 ; C2020111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v11, s4, v2, 0, 0 ; D2820002 0408090B V_MAD_F32 v0, v6, v3, v0, 0, 0 ; D2820000 04020706 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v0, s4, v2, 0, 0 ; D2820002 04080900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1d ; C202011D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MAD_F32 v2, v2, -5.000000e-01, -5.000000e-01, 0, 0 ; D2820002 03C5E302 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v8 ; 10061004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v11, s4, v3, 0, 0 ; D2820003 040C090B S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v0, s4, v3, 0, 0 ; D2820003 040C0900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1c ; C202011C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MAD_F32 v3, v3, 5.000000e-01, -5.000000e-01, 0, 0 ; D2820003 03C5E103 V_MOV_B32_e32 v4, 0.000000e+00 ; 7E080280 EXP 15, 34, 0, 0, 0, v3, v2, v4, v1 ; F800022F 01040203 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v2, s4, v8 ; 10041004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v11, s4, v2, 0, 0 ; D2820002 0408090B S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, v0, s4, v2, 0, 0 ; D2820002 04080900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v2, s4, v2 ; 06040404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v8 ; 10061004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v11, s4, v3, 0, 0 ; D2820003 040C090B S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v0, s4, v3, 0, 0 ; D2820003 040C0900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_RCP_F32_e32 v5, v3 ; 7E0A5503 V_MUL_F32_e32 v6, v5, v2 ; 100C0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v8 ; 100E1004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v11, s4, v7, 0, 0 ; D2820007 041C090B S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v0, s4, v7, 0, 0 ; D2820007 041C0900 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xc ; C202010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v7, s4, v7 ; 060E0E04 V_MUL_F32_e32 v5, v5, v7 ; 100A0F05 EXP 15, 35, 0, 0, 0, v5, v6, v4, v1 ; F800023F 01040605 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v1, s4, v8 ; 10021004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v11, s4, v1, 0, 0 ; D2820001 0404090B S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s4, v1, 0, 0 ; D2820000 04040900 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xe ; C200010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 EXP 15, 12, 0, 1, 0, v7, v2, v0, v3 ; F80008CF 03000207 S_ENDPGM ; BF810000 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL IN[3], GENERIC[11], PERSPECTIVE DCL IN[4], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL CONST[14] DCL CONST[0..5] DCL TEMP[0] DCL TEMP[1..10], LOCAL IMM[0] FLT32 { -0.1000, 0.0000, -1.0000, 1.0000} IMM[1] FLT32 { 0.0010, -0.1471, -0.2889, 0.4360} IMM[2] FLT32 { 0.6150, -0.5150, -0.1000, 0.5000} IMM[3] FLT32 { 1.0000, 1.1398, -0.3947, -0.5806} IMM[4] FLT32 { 1.0000, 2.0321, 1.4427, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[14].xxxx, CONST[14].yyyy 2: MAD TEMP[1].xyz, TEMP[0], CONST[5].xyxx, CONST[5].zwzz 3: MOV TEMP[2].xy, IN[1].xyyy 4: TEX TEMP[2], TEMP[2], SAMP[0], 2D 5: MOV TEMP[3].xw, TEMP[2] 6: ADD TEMP[4].x, TEMP[2].wwww, IMM[0].xxxx 7: FSGE TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy 8: UIF TEMP[5].xxxx :2 9: MOV TEMP[5].x, IMM[0].yyyy 10: ELSE :2 11: MOV TEMP[5].x, IMM[0].zzzz 12: ENDIF 13: MOV TEMP[5].x, TEMP[5].xxxx 14: FSGE TEMP[6].x, TEMP[4].xxxx, IMM[0].yyyy 15: UIF TEMP[6].xxxx :2 16: MOV TEMP[6].x, IMM[0].yyyy 17: ELSE :2 18: MOV TEMP[6].x, IMM[0].zzzz 19: ENDIF 20: MOV TEMP[5].y, TEMP[6].xxxx 21: FSGE TEMP[6].x, TEMP[4].xxxx, IMM[0].yyyy 22: UIF TEMP[6].xxxx :2 23: MOV TEMP[6].x, IMM[0].yyyy 24: ELSE :2 25: MOV TEMP[6].x, IMM[0].zzzz 26: ENDIF 27: MOV TEMP[5].z, TEMP[6].xxxx 28: FSGE TEMP[6].x, TEMP[4].xxxx, IMM[0].yyyy 29: UIF TEMP[6].xxxx :2 30: ELSE :2 31: ENDIF 32: FSLT TEMP[5].xyz, TEMP[5].xyzz, IMM[0].yyyy 33: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 34: OR TEMP[6].x, TEMP[6].xxxx, TEMP[5].yyyy 35: UIF TEMP[6].xxxx :2 36: KILL 37: ENDIF 38: MAD TEMP[5].xy, IN[3], IMM[0].wzww, IMM[0].ywyy 39: MOV TEMP[5].xy, TEMP[5].xyyy 40: TEX TEMP[5], TEMP[5], SAMP[5], 2D 41: MOV TEMP[4].z, TEMP[5] 42: ABS TEMP[6].x, TEMP[5] 43: MOV TEMP[6], -TEMP[6].xxxx 44: FSGE TEMP[7].x, TEMP[6].xxxx, IMM[0].yyyy 45: UIF TEMP[7].xxxx :2 46: MOV TEMP[7].x, IMM[0].zzzz 47: ELSE :2 48: MOV TEMP[7].x, IMM[0].yyyy 49: ENDIF 50: MOV TEMP[7].x, TEMP[7].xxxx 51: FSGE TEMP[8].x, TEMP[6].yyyy, IMM[0].yyyy 52: UIF TEMP[8].xxxx :2 53: MOV TEMP[8].x, IMM[0].zzzz 54: ELSE :2 55: MOV TEMP[8].x, IMM[0].yyyy 56: ENDIF 57: MOV TEMP[7].y, TEMP[8].xxxx 58: FSGE TEMP[8].x, TEMP[6].zzzz, IMM[0].yyyy 59: UIF TEMP[8].xxxx :2 60: MOV TEMP[8].x, IMM[0].zzzz 61: ELSE :2 62: MOV TEMP[8].x, IMM[0].yyyy 63: ENDIF 64: MOV TEMP[7].z, TEMP[8].xxxx 65: FSGE TEMP[6].x, TEMP[6].wwww, IMM[0].yyyy 66: UIF TEMP[6].xxxx :2 67: MOV TEMP[6].x, IMM[0].zzzz 68: ELSE :2 69: MOV TEMP[6].x, IMM[0].yyyy 70: ENDIF 71: MOV TEMP[7].w, TEMP[6].xxxx 72: MOV TEMP[6].z, TEMP[7] 73: FSLT TEMP[7].xyz, TEMP[7].xyzz, IMM[0].yyyy 74: OR TEMP[8].x, TEMP[7].xxxx, TEMP[7].zzzz 75: OR TEMP[8].x, TEMP[8].xxxx, TEMP[7].yyyy 76: UIF TEMP[8].xxxx :2 77: KILL 78: ENDIF 79: RCP TEMP[6].x, CONST[0].xxxx 80: RCP TEMP[7].x, CONST[0].yyyy 81: MOV TEMP[6].y, TEMP[7].xxxx 82: MUL TEMP[1].yw, TEMP[6].xxzy, TEMP[1].xxzy 83: MOV TEMP[4].yw, TEMP[1].wyww 84: MUL TEMP[1].xy, IMM[1].xxxx, IN[2] 85: MOV TEMP[1].xy, TEMP[1].xyyy 86: TEX TEMP[1], TEMP[1], SAMP[1], 2D 87: MOV TEMP[6].xw, TEMP[1].xxxw 88: MAD TEMP[7].xy, TEMP[4].ywzw, IMM[0].wzww, IMM[0].ywyy 89: MOV TEMP[7].xy, TEMP[7].xyyy 90: TEX TEMP[7], TEMP[7], SAMP[2], 2D 91: MOV TEMP[8].w, TEMP[7].xyxw 92: DP3 TEMP[9].x, IMM[1].yzww, TEMP[1].xyzz 93: MOV TEMP[9].y, TEMP[9].xxxx 94: DP3 TEMP[10].x, IMM[2].xyzz, TEMP[1].xyzz 95: MOV TEMP[9].z, TEMP[10].xxxx 96: DP3 TEMP[10].x, IMM[1].yzww, TEMP[2].xyzz 97: MOV TEMP[6].y, TEMP[10].xxxx 98: DP3 TEMP[10].x, IMM[2].xyzz, TEMP[2].xyzz 99: MOV TEMP[6].z, TEMP[10].xxxx 100: LRP TEMP[1].yz, TEMP[1].wwww, TEMP[9], TEMP[6] 101: MOV TEMP[3].yz, TEMP[1].zyzz 102: DP2 TEMP[6].x, IMM[3].xyyy, TEMP[3].xzzz 103: DP3 TEMP[1].x, IMM[3].xzww, TEMP[3].xyzz 104: MOV TEMP[6].y, TEMP[1].xxxx 105: DP2 TEMP[1].x, IMM[4].xyyy, TEMP[3].xyyy 106: MOV TEMP[6].z, TEMP[1].xxxx 107: MUL TEMP[1].xyz, TEMP[7], CONST[1].xxxx 108: MOV TEMP[3].xyz, TEMP[1].xyzx 109: MAD TEMP[1].y, TEMP[7].wwww, -CONST[1].xxxx, -IMM[0].zzzz 110: MOV TEMP[7].xyz, TEMP[6].xyzz 111: TEX TEMP[7], TEMP[7], SAMP[4], 3D 112: MAD TEMP[1].xyz, TEMP[7], TEMP[1].yyyy, TEMP[3] 113: MOV TEMP[3].xyz, TEMP[1].xyzx 114: MOV TEMP[1].xy, IN[1].zwww 115: TEX TEMP[1], TEMP[1], SAMP[6], 2D 116: LRP TEMP[1].xyz, TEMP[5].xxxx, TEMP[3], TEMP[1] 117: MOV TEMP[8].xyz, TEMP[1].xyzx 118: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 119: MOV TEMP[1].xyz, TEMP[1].xyzz 120: TEX TEMP[1], TEMP[1], SAMP[7], 3D 121: LRP TEMP[1].xyz, TEMP[5].xxxx, TEMP[1], TEMP[8] 122: MOV TEMP[4].xyz, TEMP[1].xyzx 123: MAD TEMP[1].x, IN[4].yyyy, IMM[2].wwww, IMM[2].wwww 124: MOV TEMP[3].x, TEMP[1].xxxx 125: MOV TEMP[3].y, CONST[4].wwww 126: MOV TEMP[1].xy, TEMP[3].xyyy 127: TEX TEMP[1].xw, TEMP[1], SAMP[3], 2D 128: MOV TEMP[6].w, TEMP[1].wwww 129: ADD TEMP[5].xyz, -CONST[2], IN[2] 130: MOV TEMP[3].z, TEMP[5].xyzx 131: DP3 TEMP[3].x, TEMP[5].xyzz, TEMP[5].xyzz 132: MUL TEMP[7].xy, TEMP[3].xzzw, CONST[4].yxzw 133: MUL TEMP[8].y, TEMP[7].yyyy, IMM[4].zzzz 134: EX2 TEMP[8].x, TEMP[8].yyyy 135: ADD TEMP[8].y, -TEMP[8].xxxx, IMM[0].wwww 136: MUL TEMP[7].x, TEMP[8].yyyy, TEMP[7].xxxx 137: RCP TEMP[5].x, TEMP[5].zzzz 138: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx 139: MUL TEMP[5].x, TEMP[5].xxxx, IMM[4].zzzz 140: MOV TEMP[3].x, TEMP[5].xxxx 141: EX2 TEMP[5].x, TEMP[5].xxxx 142: MOV_SAT TEMP[3].x, TEMP[5].xxxx 143: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].wwww 144: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx 145: ADD TEMP[3].xyz, -TEMP[4], CONST[3] 146: MOV TEMP[6].xyz, TEMP[3].xyzx 147: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[6], TEMP[4] 148: MOV TEMP[1].xyz, TEMP[1].xyzx 149: MOV TEMP[1].w, TEMP[2].wwww 150: MOV OUT[0], TEMP[1] 151: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %42 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %84 = fmul float %15, %40 %85 = fadd float %84, %41 %86 = fmul float %14, %36 %87 = fadd float %86, %38 %88 = fmul float %85, %37 %89 = fadd float %88, %39 %90 = bitcast float %74 to i32 %91 = bitcast float %75 to i32 %92 = insertelement <2 x i32> undef, i32 %90, i32 0 %93 = insertelement <2 x i32> %92, i32 %91, i32 1 %94 = bitcast <8 x i32> %43 to <32 x i8> %95 = bitcast <4 x i32> %45 to <16 x i8> %96 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %93, <32 x i8> %94, <16 x i8> %95, i32 2) %97 = extractelement <4 x float> %96, i32 0 %98 = extractelement <4 x float> %96, i32 1 %99 = extractelement <4 x float> %96, i32 2 %100 = extractelement <4 x float> %96, i32 3 %101 = fadd float %100, 0xBFB99999A0000000 %102 = fcmp oge float %101, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float 0.000000e+00, float -1.000000e+00 %107 = fcmp oge float %101, 0.000000e+00 %108 = sext i1 %107 to i32 %109 = bitcast i32 %108 to float %110 = bitcast float %109 to i32 %111 = icmp ne i32 %110, 0 %temp24.0 = select i1 %111, float 0.000000e+00, float -1.000000e+00 %112 = fcmp oge float %101, 0.000000e+00 %113 = sext i1 %112 to i32 %114 = bitcast i32 %113 to float %115 = bitcast float %114 to i32 %116 = icmp ne i32 %115, 0 %.71 = select i1 %116, float 0.000000e+00, float -1.000000e+00 %117 = fcmp oge float %101, 0.000000e+00 %118 = sext i1 %117 to i32 %119 = bitcast i32 %118 to float %120 = bitcast float %119 to i32 %121 = icmp ne i32 %120, 0 %122 = fcmp olt float %., 0.000000e+00 %123 = sext i1 %122 to i32 %124 = fcmp olt float %temp24.0, 0.000000e+00 %125 = sext i1 %124 to i32 %126 = fcmp olt float %.71, 0.000000e+00 %127 = sext i1 %126 to i32 %128 = bitcast i32 %123 to float %129 = bitcast i32 %125 to float %130 = bitcast i32 %127 to float %131 = bitcast float %128 to i32 %132 = bitcast float %130 to i32 %133 = or i32 %131, %132 %134 = bitcast i32 %133 to float %135 = bitcast float %134 to i32 %136 = bitcast float %129 to i32 %137 = or i32 %135, %136 %138 = bitcast i32 %137 to float %139 = bitcast float %138 to i32 %140 = icmp ne i32 %139, 0 br i1 %140, label %IF54, label %ENDIF53 IF54: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF53 ENDIF53: ; preds = %main_body, %IF54 %141 = fmul float %81, 1.000000e+00 %142 = fadd float %141, 0.000000e+00 %143 = fmul float %82, -1.000000e+00 %144 = fadd float %143, 1.000000e+00 %145 = bitcast float %142 to i32 %146 = bitcast float %144 to i32 %147 = insertelement <2 x i32> undef, i32 %145, i32 0 %148 = insertelement <2 x i32> %147, i32 %146, i32 1 %149 = bitcast <8 x i32> %63 to <32 x i8> %150 = bitcast <4 x i32> %65 to <16 x i8> %151 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %148, <32 x i8> %149, <16 x i8> %150, i32 2) %152 = extractelement <4 x float> %151, i32 0 %153 = extractelement <4 x float> %151, i32 2 %154 = call float @fabs(float %152) %155 = fsub float -0.000000e+00, %154 %156 = fsub float -0.000000e+00, %154 %157 = fsub float -0.000000e+00, %154 %158 = fsub float -0.000000e+00, %154 %159 = fcmp oge float %155, 0.000000e+00 %160 = sext i1 %159 to i32 %161 = bitcast i32 %160 to float %162 = bitcast float %161 to i32 %163 = icmp ne i32 %162, 0 %.72 = select i1 %163, float -1.000000e+00, float 0.000000e+00 %164 = fcmp oge float %156, 0.000000e+00 %165 = sext i1 %164 to i32 %166 = bitcast i32 %165 to float %167 = bitcast float %166 to i32 %168 = icmp ne i32 %167, 0 %temp32.0 = select i1 %168, float -1.000000e+00, float 0.000000e+00 %169 = fcmp oge float %157, 0.000000e+00 %170 = sext i1 %169 to i32 %171 = bitcast i32 %170 to float %172 = bitcast float %171 to i32 %173 = icmp ne i32 %172, 0 %.73 = select i1 %173, float -1.000000e+00, float 0.000000e+00 %174 = fcmp oge float %158, 0.000000e+00 %175 = sext i1 %174 to i32 %176 = bitcast i32 %175 to float %177 = bitcast float %176 to i32 %178 = icmp ne i32 %177, 0 %179 = fcmp olt float %.72, 0.000000e+00 %180 = sext i1 %179 to i32 %181 = fcmp olt float %temp32.0, 0.000000e+00 %182 = sext i1 %181 to i32 %183 = fcmp olt float %.73, 0.000000e+00 %184 = sext i1 %183 to i32 %185 = bitcast i32 %180 to float %186 = bitcast i32 %182 to float %187 = bitcast i32 %184 to float %188 = bitcast float %185 to i32 %189 = bitcast float %187 to i32 %190 = or i32 %188, %189 %191 = bitcast i32 %190 to float %192 = bitcast float %191 to i32 %193 = bitcast float %186 to i32 %194 = or i32 %192, %193 %195 = bitcast i32 %194 to float %196 = bitcast float %195 to i32 %197 = icmp ne i32 %196, 0 br i1 %197, label %IF69, label %ENDIF68 IF69: ; preds = %ENDIF53 call void @llvm.AMDGPU.kilp() br label %ENDIF68 ENDIF68: ; preds = %ENDIF53, %IF69 %198 = fdiv float 1.000000e+00, %24 %199 = fdiv float 1.000000e+00, %25 %200 = fmul float %198, %87 %201 = fmul float %199, %89 %202 = fmul float 0x3F50624DE0000000, %78 %203 = fmul float 0x3F50624DE0000000, %79 %204 = bitcast float %202 to i32 %205 = bitcast float %203 to i32 %206 = insertelement <2 x i32> undef, i32 %204, i32 0 %207 = insertelement <2 x i32> %206, i32 %205, i32 1 %208 = bitcast <8 x i32> %47 to <32 x i8> %209 = bitcast <4 x i32> %49 to <16 x i8> %210 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %207, <32 x i8> %208, <16 x i8> %209, i32 2) %211 = extractelement <4 x float> %210, i32 0 %212 = extractelement <4 x float> %210, i32 1 %213 = extractelement <4 x float> %210, i32 2 %214 = extractelement <4 x float> %210, i32 3 %215 = fmul float %200, 1.000000e+00 %216 = fadd float %215, 0.000000e+00 %217 = fmul float %201, -1.000000e+00 %218 = fadd float %217, 1.000000e+00 %219 = bitcast float %216 to i32 %220 = bitcast float %218 to i32 %221 = insertelement <2 x i32> undef, i32 %219, i32 0 %222 = insertelement <2 x i32> %221, i32 %220, i32 1 %223 = bitcast <8 x i32> %51 to <32 x i8> %224 = bitcast <4 x i32> %53 to <16 x i8> %225 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %222, <32 x i8> %223, <16 x i8> %224, i32 2) %226 = extractelement <4 x float> %225, i32 0 %227 = extractelement <4 x float> %225, i32 1 %228 = extractelement <4 x float> %225, i32 2 %229 = extractelement <4 x float> %225, i32 3 %230 = fmul float 0xBFC2D527E0000000, %211 %231 = fmul float 0xBFD27CAEA0000000, %212 %232 = fadd float %231, %230 %233 = fmul float 0x3FDBE76C80000000, %213 %234 = fadd float %232, %233 %235 = fmul float 0x3FE3AE1480000000, %211 %236 = fmul float 0xBFE07ACC40000000, %212 %237 = fadd float %236, %235 %238 = fmul float 0xBFB99A4160000000, %213 %239 = fadd float %237, %238 %240 = fmul float 0xBFC2D527E0000000, %97 %241 = fmul float 0xBFD27CAEA0000000, %98 %242 = fadd float %241, %240 %243 = fmul float 0x3FDBE76C80000000, %99 %244 = fadd float %242, %243 %245 = fmul float 0x3FE3AE1480000000, %97 %246 = fmul float 0xBFE07ACC40000000, %98 %247 = fadd float %246, %245 %248 = fmul float 0xBFB99A4160000000, %99 %249 = fadd float %247, %248 %250 = call float @llvm.AMDGPU.lrp(float %214, float %234, float %244) %251 = call float @llvm.AMDGPU.lrp(float %214, float %239, float %249) %252 = fmul float 1.000000e+00, %97 %253 = fmul float 0x3FF23CBE60000000, %251 %254 = fadd float %252, %253 %255 = fmul float 1.000000e+00, %97 %256 = fmul float 0xBFD941F220000000, %250 %257 = fadd float %256, %255 %258 = fmul float 0xBFE2944680000000, %251 %259 = fadd float %257, %258 %260 = fmul float 1.000000e+00, %97 %261 = fmul float 0x400041C2E0000000, %250 %262 = fadd float %260, %261 %263 = fmul float %226, %26 %264 = fmul float %227, %26 %265 = fmul float %228, %26 %266 = fsub float -0.000000e+00, %26 %267 = fmul float %229, %266 %268 = fadd float %267, 1.000000e+00 %269 = bitcast float %254 to i32 %270 = bitcast float %259 to i32 %271 = bitcast float %262 to i32 %272 = insertelement <4 x i32> undef, i32 %269, i32 0 %273 = insertelement <4 x i32> %272, i32 %270, i32 1 %274 = insertelement <4 x i32> %273, i32 %271, i32 2 %275 = insertelement <4 x i32> %274, i32 undef, i32 3 %276 = bitcast <8 x i32> %59 to <32 x i8> %277 = bitcast <4 x i32> %61 to <16 x i8> %278 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %275, <32 x i8> %276, <16 x i8> %277, i32 3) %279 = extractelement <4 x float> %278, i32 0 %280 = extractelement <4 x float> %278, i32 1 %281 = extractelement <4 x float> %278, i32 2 %282 = fmul float %279, %268 %283 = fadd float %282, %263 %284 = fmul float %280, %268 %285 = fadd float %284, %264 %286 = fmul float %281, %268 %287 = fadd float %286, %265 %288 = bitcast float %76 to i32 %289 = bitcast float %77 to i32 %290 = insertelement <2 x i32> undef, i32 %288, i32 0 %291 = insertelement <2 x i32> %290, i32 %289, i32 1 %292 = bitcast <8 x i32> %67 to <32 x i8> %293 = bitcast <4 x i32> %69 to <16 x i8> %294 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %291, <32 x i8> %292, <16 x i8> %293, i32 2) %295 = extractelement <4 x float> %294, i32 0 %296 = extractelement <4 x float> %294, i32 1 %297 = extractelement <4 x float> %294, i32 2 %298 = call float @llvm.AMDGPU.lrp(float %152, float %283, float %295) %299 = call float @llvm.AMDGPU.lrp(float %152, float %285, float %296) %300 = call float @llvm.AMDGPU.lrp(float %152, float %287, float %297) %301 = fsub float -0.000000e+00, %152 %302 = fmul float %153, %301 %303 = fadd float %302, %152 %304 = bitcast float %298 to i32 %305 = bitcast float %299 to i32 %306 = bitcast float %300 to i32 %307 = insertelement <4 x i32> undef, i32 %304, i32 0 %308 = insertelement <4 x i32> %307, i32 %305, i32 1 %309 = insertelement <4 x i32> %308, i32 %306, i32 2 %310 = insertelement <4 x i32> %309, i32 undef, i32 3 %311 = bitcast <8 x i32> %71 to <32 x i8> %312 = bitcast <4 x i32> %73 to <16 x i8> %313 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %310, <32 x i8> %311, <16 x i8> %312, i32 3) %314 = extractelement <4 x float> %313, i32 0 %315 = extractelement <4 x float> %313, i32 1 %316 = extractelement <4 x float> %313, i32 2 %317 = call float @llvm.AMDGPU.lrp(float %303, float %314, float %298) %318 = call float @llvm.AMDGPU.lrp(float %303, float %315, float %299) %319 = call float @llvm.AMDGPU.lrp(float %303, float %316, float %300) %320 = fmul float %83, 5.000000e-01 %321 = fadd float %320, 5.000000e-01 %322 = bitcast float %321 to i32 %323 = bitcast float %35 to i32 %324 = insertelement <2 x i32> undef, i32 %322, i32 0 %325 = insertelement <2 x i32> %324, i32 %323, i32 1 %326 = bitcast <8 x i32> %55 to <32 x i8> %327 = bitcast <4 x i32> %57 to <16 x i8> %328 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %325, <32 x i8> %326, <16 x i8> %327, i32 2) %329 = extractelement <4 x float> %328, i32 0 %330 = fsub float -0.000000e+00, %27 %331 = fadd float %330, %78 %332 = fsub float -0.000000e+00, %28 %333 = fadd float %332, %79 %334 = fsub float -0.000000e+00, %29 %335 = fadd float %334, %80 %336 = fmul float %331, %331 %337 = fmul float %333, %333 %338 = fadd float %337, %336 %339 = fmul float %335, %335 %340 = fadd float %338, %339 %341 = fmul float %340, %34 %342 = fmul float %335, %33 %343 = fmul float %342, 0x3FF7154CA0000000 %344 = call float @llvm.AMDIL.exp.(float %343) %345 = fsub float -0.000000e+00, %344 %346 = fadd float %345, 1.000000e+00 %347 = fmul float %346, %341 %348 = fdiv float 1.000000e+00, %335 %349 = fmul float %348, %347 %350 = fmul float %349, 0x3FF7154CA0000000 %351 = call float @llvm.AMDIL.exp.(float %350) %352 = call float @llvm.AMDIL.clamp.(float %351, float 0.000000e+00, float 1.000000e+00) %353 = fsub float -0.000000e+00, %352 %354 = fadd float %353, 1.000000e+00 %355 = fmul float %354, %329 %356 = fsub float -0.000000e+00, %317 %357 = fadd float %356, %30 %358 = fsub float -0.000000e+00, %318 %359 = fadd float %358, %31 %360 = fsub float -0.000000e+00, %319 %361 = fadd float %360, %32 %362 = fmul float %355, %357 %363 = fadd float %362, %317 %364 = fmul float %355, %359 %365 = fadd float %364, %318 %366 = fmul float %355, %361 %367 = fadd float %366, %319 %368 = call i32 @llvm.SI.packf16(float %363, float %365) %369 = bitcast i32 %368 to float %370 = call i32 @llvm.SI.packf16(float %367, float %100) %371 = bitcast i32 %370 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %369, float %371, float %369, float %371) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #3 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v5, v0, 1, 0, [m0] ; C8140100 V_INTERP_P2_F32 v5, [v5], v1, 1, 0, [m0] ; C8150101 V_INTERP_P1_F32 v4, v0, 0, 0, [m0] ; C8100000 V_INTERP_P2_F32 v4, [v4], v1, 0, 0, [m0] ; C8110001 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x0 ; C0860500 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x0 ; C0C80700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[16:23], s[12:15] ; F0800F00 00640404 V_MOV_B32_e32 v8, -1.000000e-01 ; 7E1002FF BDCCCCCD S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v8, v7, v8 ; 06101107 V_CMP_GE_F32_e64 s[0:1], v8, 0.000000e+00, 0, 0 ; D00C0000 00010108 V_CNDMASK_B32_e64 v8, -1.000000e+00, 0, s[0:1], 0, 0, 0, 0 ; D2000008 000100F3 V_CMP_LT_F32_e64 s[0:1], v8, 0.000000e+00, 0, 0 ; D0020000 00010108 V_CNDMASK_B32_e64 v8, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000008 00018280 V_OR_B32_e32 v8, v8, v8 ; 38101108 V_CMP_NE_I32_e64 s[0:1], v8, 0, 0, 0 ; D10A0000 00010108 V_INTERP_P1_F32 v8, v0, 1, 3, [m0] ; C8200D00 V_INTERP_P2_F32 v8, [v8], v1, 1, 3, [m0] ; C8210D01 V_INTERP_P1_F32 v22, v0, 1, 2, [m0] ; C8580900 V_INTERP_P2_F32 v22, [v22], v1, 1, 2, [m0] ; C8590901 V_INTERP_P1_F32 v23, v0, 0, 2, [m0] ; C85C0800 V_INTERP_P2_F32 v23, [v23], v1, 0, 2, [m0] ; C85D0801 V_INTERP_P1_F32 v9, v0, 2, 1, [m0] ; C8240600 V_INTERP_P2_F32 v9, [v9], v1, 2, 1, [m0] ; C8250601 V_INTERP_P1_F32 v10, v0, 1, 1, [m0] ; C8280500 V_INTERP_P2_F32 v10, [v10], v1, 1, 1, [m0] ; C8290501 V_INTERP_P1_F32 v11, v0, 0, 1, [m0] ; C82C0400 V_INTERP_P2_F32 v11, [v11], v1, 0, 1, [m0] ; C82D0401 V_INTERP_P1_F32 v15, v0, 3, 0, [m0] ; C83C0300 V_INTERP_P2_F32 v15, [v15], v1, 3, 0, [m0] ; C83D0301 V_INTERP_P1_F32 v14, v0, 2, 0, [m0] ; C8380200 V_INTERP_P2_F32 v14, [v14], v1, 2, 0, [m0] ; C8390201 S_LOAD_DWORDX4 s[24:27], s[2:3], 0x0 ; C08C0300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s2, s[24:27], 0x39 ; C2011939 S_BUFFER_LOAD_DWORD s3, s[24:27], 0x38 ; C2019938 S_BUFFER_LOAD_DWORD s8, s[24:27], 0x17 ; C2041917 S_BUFFER_LOAD_DWORD s9, s[24:27], 0x16 ; C2049916 S_BUFFER_LOAD_DWORD s10, s[24:27], 0x15 ; C2051915 S_BUFFER_LOAD_DWORD s11, s[24:27], 0x14 ; C2059914 S_BUFFER_LOAD_DWORD s12, s[24:27], 0x13 ; C2061913 S_BUFFER_LOAD_DWORD s13, s[24:27], 0x11 ; C2069911 S_BUFFER_LOAD_DWORD s14, s[24:27], 0x10 ; C2071910 S_BUFFER_LOAD_DWORD s15, s[24:27], 0xe ; C207990E S_BUFFER_LOAD_DWORD s16, s[24:27], 0xd ; C208190D S_BUFFER_LOAD_DWORD s17, s[24:27], 0xc ; C208990C S_BUFFER_LOAD_DWORD s18, s[24:27], 0xa ; C209190A S_BUFFER_LOAD_DWORD s19, s[24:27], 0x9 ; C2099909 S_BUFFER_LOAD_DWORD s20, s[24:27], 0x8 ; C20A1908 S_BUFFER_LOAD_DWORD s21, s[24:27], 0x4 ; C20A9904 S_BUFFER_LOAD_DWORD s22, s[24:27], 0x1 ; C20B1901 S_BUFFER_LOAD_DWORD s23, s[24:27], 0x0 ; C20B9900 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E V_MOV_B32_e32 v27, s2 ; 7E360202 V_MOV_B32_e32 v29, s3 ; 7E3A0203 V_MOV_B32_e32 v28, s8 ; 7E380208 V_MOV_B32_e32 v31, s9 ; 7E3E0209 V_MOV_B32_e32 v30, s10 ; 7E3C020A V_MOV_B32_e32 v32, s11 ; 7E40020B V_MOV_B32_e32 v13, s12 ; 7E1A020C V_MOV_B32_e32 v17, s13 ; 7E22020D V_MOV_B32_e32 v16, s14 ; 7E20020E V_MOV_B32_e32 v0, s15 ; 7E00020F V_MOV_B32_e32 v19, s16 ; 7E260210 V_MOV_B32_e32 v1, s17 ; 7E020211 V_MOV_B32_e32 v18, s18 ; 7E240212 V_MOV_B32_e32 v21, s19 ; 7E2A0213 V_MOV_B32_e32 v20, s20 ; 7E280214 V_MOV_B32_e32 v26, s21 ; 7E340215 V_MOV_B32_e32 v33, s22 ; 7E420216 V_MOV_B32_e32 v34, s23 ; 7E440217 S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_SUB_F32_e32 v24, 1.000000e+00, v22 ; 08302CF2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x14 ; C0800514 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x28 ; C0C40728 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[8:15], s[0:3] ; F0800F00 00021617 V_MOV_B32_e32 v35, 0x80000000 ; 7E4602FF 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 V_OR_B32_e32 v35, v22, v35 ; 38464716 V_CMP_GE_F32_e64 s[0:1], v35, 0.000000e+00, 0, 0 ; D00C0000 00010123 V_CNDMASK_B32_e64 v35, 0, -1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000023 0001E680 V_CMP_LT_F32_e64 s[0:1], v35, 0.000000e+00, 0, 0 ; D0020000 00010123 V_CNDMASK_B32_e64 v35, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000023 00018280 V_OR_B32_e32 v35, v35, v35 ; 38464723 V_CMP_NE_I32_e64 s[0:1], v35, 0, 0, 0 ; D10A0000 00010123 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E V_MUL_F32_e32 v36, 1.000000e-03, v10 ; 104814FF 3A83126F V_MUL_F32_e32 v35, 1.000000e-03, v11 ; 104616FF 3A83126F S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x8 ; C0C40708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[35:36], s[8:15], s[0:3] ; F0800F00 00022323 V_MOV_B32_e32 v39, 6.150000e-01 ; 7E4E02FF 3F1D70A4 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v40, v35, v39 ; 10504F23 V_MOV_B32_e32 v41, -5.149900e-01 ; 7E5202FF BF03D662 V_MAD_F32 v40, v36, v41, v40, 0, 0 ; D2820028 04A25324 V_MOV_B32_e32 v42, -1.000100e-01 ; 7E5402FF BDCCD20B V_MAD_F32 v40, v37, v42, v40, 0, 0 ; D2820028 04A25525 V_MUL_F32_e32 v39, v4, v39 ; 104E4F04 V_MAD_F32 v39, v5, v41, v39, 0, 0 ; D2820027 049E5305 V_MAD_F32 v39, v6, v42, v39, 0, 0 ; D2820027 049E5506 V_SUB_F32_e32 v41, 1.000000e+00, v38 ; 08524CF2 V_MUL_F32_e32 v39, v41, v39 ; 104E4F29 V_MAD_F32 v39, v38, v40, v39, 0, 0 ; D2820027 049E5126 V_MOV_B32_e32 v40, -1.471300e-01 ; 7E5002FF BE16A93F V_MUL_F32_e32 v42, v35, v40 ; 10545123 V_MOV_B32_e32 v43, -2.888600e-01 ; 7E5602FF BE93E575 V_MAD_F32 v42, v36, v43, v42, 0, 0 ; D282002A 04AA5724 V_MOV_B32_e32 v44, 4.360000e-01 ; 7E5802FF 3EDF3B64 V_MAD_F32 v42, v37, v44, v42, 0, 0 ; D282002A 04AA5925 V_MUL_F32_e32 v40, v4, v40 ; 10505104 V_MAD_F32 v40, v5, v43, v40, 0, 0 ; D2820028 04A25705 V_MAD_F32 v40, v6, v44, v40, 0, 0 ; D2820028 04A25906 V_MUL_F32_e32 v40, v41, v40 ; 10505129 V_MAD_F32 v35, v38, v42, v40, 0, 0 ; D2820023 04A25526 V_MOV_B32_e32 v36, -3.946500e-01 ; 7E4802FF BECA0F91 V_MAD_F32 v36, v35, v36, v4, 0, 0 ; D2820024 04124923 V_MOV_B32_e32 v37, -5.806000e-01 ; 7E4A02FF BF14A234 V_MAD_F32 v41, v39, v37, v36, 0, 0 ; D2820029 04924B27 V_MOV_B32_e32 v36, 2.032110e+00 ; 7E4802FF 40020E17 V_MAD_F32 v42, v35, v36, v4, 0, 0 ; D282002A 04124923 V_MOV_B32_e32 v35, 1.139830e+00 ; 7E4602FF 3F91E5F3 V_MAD_F32 v40, v39, v35, v4, 0, 0 ; D2820028 04124727 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x10 ; C0800510 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x20 ; C0C40720 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[8:15], s[0:3] ; F0800700 00022328 V_MAD_F32 v2, v2, v32, v31, 0, 0 ; D2820002 047E4102 V_RCP_F32_e32 v31, v34 ; 7E3E5522 V_MUL_F32_e32 v31, v31, v2 ; 103E051F V_MAD_F32 v2, v3, v29, v27, 0, 0 ; D2820002 046E3B03 V_MAD_F32 v2, v2, v30, v28, 0, 0 ; D2820002 04723D02 V_RCP_F32_e32 v3, v33 ; 7E065521 V_MUL_F32_e32 v2, v3, v2 ; 10040503 V_SUB_F32_e32 v32, 1.000000e+00, v2 ; 084004F2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x8 ; C0800508 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x10 ; C0C40710 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 IMAGE_SAMPLE v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[8:15], s[0:3] ; F0800F00 00021B1F S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v2, v29, v26 ; 1004351D V_MUL_F32_e32 v3, v30, v26 ; 1006351E V_SUB_F32_e32 v3, 1.000000e+00, v3 ; 080606F2 V_MAD_F32 v2, v37, v3, v2, 0, 0 ; D2820002 040A0725 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x18 ; C0800518 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x30 ; C0C40730 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800700 00021F0E V_SUB_F32_e32 v14, 1.000000e+00, v22 ; 081C2CF2 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v15, v14, v33 ; 101E430E V_MAD_F32 v40, v22, v2, v15, 0, 0 ; D2820028 043E0516 V_MUL_F32_e32 v2, v28, v26 ; 1004351C V_MAD_F32 v2, v36, v3, v2, 0, 0 ; D2820002 040A0724 V_MUL_F32_e32 v15, v14, v32 ; 101E410E V_MAD_F32 v39, v22, v2, v15, 0, 0 ; D2820027 043E0516 V_MUL_F32_e32 v2, v27, v26 ; 1004351B V_MAD_F32 v2, v35, v3, v2, 0, 0 ; D2820002 040A0723 V_MUL_F32_e32 v3, v14, v31 ; 10063F0E V_MAD_F32 v38, v22, v2, v3, 0, 0 ; D2820026 040E0516 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x1c ; C080051C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x38 ; C0C40738 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[38:41], s[8:15], s[0:3] ; F0800700 00021A26 V_MUL_F32_e32 v2, v24, v22 ; 10042D18 V_SUB_F32_e32 v2, v22, v2 ; 08040516 V_SUB_F32_e32 v3, 1.000000e+00, v2 ; 080604F2 V_MUL_F32_e32 v14, v3, v39 ; 101C4F03 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v14, v2, v27, v14, 0, 0 ; D282000E 043A3702 V_SUB_F32_e32 v15, v19, v14 ; 081E1D13 V_SUB_F32_e32 v10, v10, v21 ; 08142B0A V_SUB_F32_e32 v11, v11, v20 ; 0816290B V_MUL_F32_e32 v11, v11, v11 ; 1016170B V_MAD_F32 v10, v10, v10, v11, 0, 0 ; D282000A 042E150A V_SUB_F32_e32 v9, v9, v18 ; 08122509 V_MAD_F32 v10, v9, v9, v10, 0, 0 ; D282000A 042A1309 V_MUL_F32_e32 v10, v10, v17 ; 1014230A V_MUL_F32_e32 v11, v9, v16 ; 10162109 V_MUL_F32_e32 v11, 1.442700e+00, v11 ; 101616FF 3FB8AA65 V_EXP_F32_e32 v11, v11 ; 7E164B0B V_SUB_F32_e32 v11, 1.000000e+00, v11 ; 081616F2 V_MUL_F32_e32 v10, v11, v10 ; 1014150B V_RCP_F32_e32 v9, v9 ; 7E125509 V_MUL_F32_e32 v9, v9, v10 ; 10121509 V_MUL_F32_e32 v9, 1.442700e+00, v9 ; 101212FF 3FB8AA65 V_EXP_F32_e32 v9, v9 ; 7E124B09 V_ADD_F32_e64 v9, v9, 0, 1, 0 ; D2060809 00010109 V_SUB_F32_e32 v9, 1.000000e+00, v9 ; 081212F2 V_MAD_F32 v12, v8, 5.000000e-01, 5.000000e-01, 0, 0 ; D282000C 03C1E108 S_LOAD_DWORDX4 s[0:3], s[4:5], 0xc ; C080050C S_LOAD_DWORDX8 s[4:11], s[6:7], 0x18 ; C0C20718 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v8, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[4:11], s[0:3] ; F0800100 0001080C S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v8, v9, v8 ; 10101109 V_MAD_F32 v9, v8, v15, v14, 0, 0 ; D2820009 043A1F08 V_MUL_F32_e32 v10, v3, v38 ; 10144D03 V_MAD_F32 v10, v2, v26, v10, 0, 0 ; D282000A 042A3502 V_SUB_F32_e32 v1, v1, v10 ; 08021501 V_MAD_F32 v1, v8, v1, v10, 0, 0 ; D2820001 042A0308 V_CVT_PKRTZ_F16_F32_e32 v1, v1, v9 ; 5E021301 V_MUL_F32_e32 v3, v3, v40 ; 10065103 V_MAD_F32 v2, v2, v28, v3, 0, 0 ; D2820002 040E3902 V_SUB_F32_e32 v0, v0, v2 ; 08000500 V_MAD_F32 v0, v8, v0, v2, 0, 0 ; D2820000 040A0108 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v7 ; 5E000F00 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL CONST[0..104] DCL TEMP[0..6], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} IMM[2] FLT32 { 0.5000, -0.5000, 0.0000, 0.0000} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+9] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[3].x, TEMP[0].xxxx 12: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 13: UARL ADDR[0].x, TEMP[3].xxxx 14: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 15: MOV TEMP[2].z, TEMP[3].xxxx 16: MUL TEMP[3].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[3].xyzx 18: F2I TEMP[3].x, TEMP[0].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: UARL ADDR[0].x, TEMP[3].xxxx 21: DP3 TEMP[3].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 22: F2I TEMP[4].x, TEMP[0].xxxx 23: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 24: UARL ADDR[0].x, TEMP[4].xxxx 25: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 26: MOV TEMP[3].y, TEMP[4].xxxx 27: F2I TEMP[0].x, TEMP[0].xxxx 28: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 29: UARL ADDR[0].x, TEMP[0].xxxx 30: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 31: MOV TEMP[3].z, TEMP[0].xxxx 32: MUL TEMP[0].xyz, TEMP[3], IN[1].xxxx 33: MOV TEMP[3].xyz, TEMP[0].xyzx 34: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 35: UIF TEMP[0].xxxx :0 36: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 37: MOV TEMP[2].w, TEMP[0].wwww 38: F2I TEMP[4].x, TEMP[0].wwww 39: UARL ADDR[0].x, TEMP[4].xxxx 40: UARL ADDR[0].x, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 42: F2I TEMP[5].x, TEMP[0].wwww 43: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 44: UARL ADDR[0].x, TEMP[5].xxxx 45: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 46: MOV TEMP[4].y, TEMP[5].xxxx 47: F2I TEMP[5].x, TEMP[0].wwww 48: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 49: UARL ADDR[0].x, TEMP[5].xxxx 50: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 51: MOV TEMP[4].z, TEMP[5].xxxx 52: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[4], TEMP[2] 53: MOV TEMP[2].xyz, TEMP[5].xyzx 54: F2I TEMP[5].x, TEMP[0].wwww 55: UARL ADDR[0].x, TEMP[5].xxxx 56: UARL ADDR[0].x, TEMP[5].xxxx 57: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 58: F2I TEMP[5].x, TEMP[0].wwww 59: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 60: UARL ADDR[0].x, TEMP[5].xxxx 61: DP3 TEMP[5].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 62: MOV TEMP[4].y, TEMP[5].xxxx 63: F2I TEMP[0].x, TEMP[0].wwww 64: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 65: UARL ADDR[0].x, TEMP[0].xxxx 66: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 67: MOV TEMP[4].z, TEMP[0].xxxx 68: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[4], TEMP[3] 69: MOV TEMP[3].xyz, TEMP[0].xyzx 70: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 71: UIF TEMP[0].xxxx :0 72: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 73: MOV TEMP[2].w, TEMP[0].wwww 74: F2I TEMP[5].x, TEMP[0].wwww 75: UARL ADDR[0].x, TEMP[5].xxxx 76: UARL ADDR[0].x, TEMP[5].xxxx 77: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 78: F2I TEMP[5].x, TEMP[0].wwww 79: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 80: UARL ADDR[0].x, TEMP[5].xxxx 81: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 82: MOV TEMP[4].y, TEMP[5].xxxx 83: F2I TEMP[5].x, TEMP[0].wwww 84: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 85: UARL ADDR[0].x, TEMP[5].xxxx 86: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 87: MOV TEMP[4].z, TEMP[5].xxxx 88: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[4], TEMP[2] 89: MOV TEMP[2].xyz, TEMP[4].xyzx 90: F2I TEMP[4].x, TEMP[0].wwww 91: UARL ADDR[0].x, TEMP[4].xxxx 92: UARL ADDR[0].x, TEMP[4].xxxx 93: DP3 TEMP[1].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 94: F2I TEMP[4].x, TEMP[0].wwww 95: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 96: UARL ADDR[0].x, TEMP[4].xxxx 97: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 98: MOV TEMP[1].y, TEMP[4].xxxx 99: F2I TEMP[0].x, TEMP[0].wwww 100: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 101: UARL ADDR[0].x, TEMP[0].xxxx 102: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 103: MOV TEMP[1].z, TEMP[0].xxxx 104: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[1], TEMP[3] 105: MOV TEMP[3].xyz, TEMP[0].xyzx 106: ENDIF 107: ENDIF 108: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 109: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 110: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 111: ADD TEMP[1], TEMP[1], CONST[3] 112: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 113: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 114: RSQ TEMP[0].x, TEMP[0].xxxx 115: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[3] 116: MOV TEMP[0].xyz, TEMP[0].xyzx 117: RCP TEMP[4].x, TEMP[1].wwww 118: MOV TEMP[2].w, TEMP[4].xxxx 119: MUL TEMP[4].xy, TEMP[1], TEMP[4].xxxx 120: MOV TEMP[4].xy, TEMP[4].xyxx 121: ADD TEMP[5].xy, TEMP[2], CONST[8].zwzw 122: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 123: MOV TEMP[5].zw, TEMP[5].wwzw 124: MUL TEMP[6].xy, TEMP[2].yyyy, CONST[5] 125: MOV TEMP[3].xy, TEMP[6].xyxx 126: MAD TEMP[6].xy, TEMP[2].xxxx, CONST[4], TEMP[3] 127: MOV TEMP[3].xy, TEMP[6].xyxx 128: MAD TEMP[6].xy, TEMP[2].zzzz, CONST[6], TEMP[3] 129: MOV TEMP[3].xy, TEMP[6].xyxx 130: ADD TEMP[6].xy, TEMP[3], CONST[7] 131: MOV TEMP[3].xy, TEMP[6].xyxx 132: MAD TEMP[3].xy, TEMP[3], IMM[2].xyzz, IMM[2].yyyy 133: MOV TEMP[3].xy, TEMP[3].xyxx 134: MOV TEMP[5].xy, IN[4].xyxx 135: MOV TEMP[2].xyz, TEMP[2].xyzx 136: MOV TEMP[2].w, IMM[0].yyyy 137: MOV TEMP[4].zw, IMM[0].yyzy 138: MOV TEMP[3].zw, IMM[0].yyzy 139: MOV OUT[2], TEMP[5] 140: MOV OUT[3], TEMP[2] 141: MOV OUT[0], TEMP[1] 142: MOV OUT[1], TEMP[0] 143: MOV OUT[4], TEMP[4] 144: MOV OUT[5], TEMP[3] 145: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0 %50 = add i32 %5, %7 %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %50) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0 %57 = add i32 %5, %7 %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %56, i32 0, i32 %57) %59 = extractelement <4 x float> %58, i32 0 %60 = extractelement <4 x float> %58, i32 1 %61 = extractelement <4 x float> %58, i32 2 %62 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = fmul float 3.000000e+00, %59 %76 = fmul float %45, 1.000000e+00 %77 = fadd float %76, 0.000000e+00 %78 = fmul float %46, 1.000000e+00 %79 = fadd float %78, 0.000000e+00 %80 = fmul float %47, 1.000000e+00 %81 = fadd float %80, 0.000000e+00 %82 = fmul float %45, 0.000000e+00 %83 = fadd float %82, 1.000000e+00 %84 = fptosi float %75 to i32 %85 = bitcast i32 %84 to float %86 = bitcast float %85 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 144 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = shl i32 %86, 4 %91 = add i32 %90, 148 %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %91) %93 = shl i32 %86, 4 %94 = add i32 %93, 152 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %86, 4 %97 = add i32 %96, 156 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = fmul float %77, %89 %100 = fmul float %79, %92 %101 = fadd float %99, %100 %102 = fmul float %81, %95 %103 = fadd float %101, %102 %104 = fmul float %83, %98 %105 = fadd float %103, %104 %106 = fptosi float %75 to i32 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = add i32 1, %108 %110 = bitcast i32 %109 to float %111 = bitcast float %110 to i32 %112 = shl i32 %111, 4 %113 = add i32 %112, 144 %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %113) %115 = shl i32 %111, 4 %116 = add i32 %115, 148 %117 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %116) %118 = shl i32 %111, 4 %119 = add i32 %118, 152 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %111, 4 %122 = add i32 %121, 156 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = fmul float %77, %114 %125 = fmul float %79, %117 %126 = fadd float %124, %125 %127 = fmul float %81, %120 %128 = fadd float %126, %127 %129 = fmul float %83, %123 %130 = fadd float %128, %129 %131 = fptosi float %75 to i32 %132 = bitcast i32 %131 to float %133 = bitcast float %132 to i32 %134 = add i32 2, %133 %135 = bitcast i32 %134 to float %136 = bitcast float %135 to i32 %137 = shl i32 %136, 4 %138 = add i32 %137, 144 %139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %138) %140 = shl i32 %136, 4 %141 = add i32 %140, 148 %142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %141) %143 = shl i32 %136, 4 %144 = add i32 %143, 152 %145 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %144) %146 = shl i32 %136, 4 %147 = add i32 %146, 156 %148 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %147) %149 = fmul float %77, %139 %150 = fmul float %79, %142 %151 = fadd float %149, %150 %152 = fmul float %81, %145 %153 = fadd float %151, %152 %154 = fmul float %83, %148 %155 = fadd float %153, %154 %156 = fmul float %105, %52 %157 = fmul float %130, %52 %158 = fmul float %155, %52 %159 = fptosi float %75 to i32 %160 = bitcast i32 %159 to float %161 = bitcast float %160 to i32 %162 = shl i32 %161, 4 %163 = add i32 %162, 144 %164 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %163) %165 = shl i32 %161, 4 %166 = add i32 %165, 148 %167 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %166) %168 = shl i32 %161, 4 %169 = add i32 %168, 152 %170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %169) %171 = fmul float %66, %164 %172 = fmul float %67, %167 %173 = fadd float %172, %171 %174 = fmul float %68, %170 %175 = fadd float %173, %174 %176 = fptosi float %75 to i32 %177 = bitcast i32 %176 to float %178 = bitcast float %177 to i32 %179 = add i32 1, %178 %180 = bitcast i32 %179 to float %181 = bitcast float %180 to i32 %182 = shl i32 %181, 4 %183 = add i32 %182, 144 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = shl i32 %181, 4 %186 = add i32 %185, 148 %187 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %186) %188 = shl i32 %181, 4 %189 = add i32 %188, 152 %190 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %189) %191 = fmul float %66, %184 %192 = fmul float %67, %187 %193 = fadd float %192, %191 %194 = fmul float %68, %190 %195 = fadd float %193, %194 %196 = fptosi float %75 to i32 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = add i32 2, %198 %200 = bitcast i32 %199 to float %201 = bitcast float %200 to i32 %202 = shl i32 %201, 4 %203 = add i32 %202, 144 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %201, 4 %206 = add i32 %205, 148 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %201, 4 %209 = add i32 %208, 152 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %66, %204 %212 = fmul float %67, %207 %213 = fadd float %212, %211 %214 = fmul float %68, %210 %215 = fadd float %213, %214 %216 = fmul float %175, %52 %217 = fmul float %195, %52 %218 = fmul float %215, %52 %219 = fcmp olt float 0.000000e+00, %53 %220 = sext i1 %219 to i32 %221 = bitcast i32 %220 to float %222 = bitcast float %221 to i32 %223 = icmp ne i32 %222, 0 br i1 %223, label %IF, label %ENDIF IF: ; preds = %main_body %224 = fmul float 3.000000e+00, %60 %225 = fptosi float %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = shl i32 %227, 4 %229 = add i32 %228, 144 %230 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %229) %231 = shl i32 %227, 4 %232 = add i32 %231, 148 %233 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %232) %234 = shl i32 %227, 4 %235 = add i32 %234, 152 %236 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %235) %237 = shl i32 %227, 4 %238 = add i32 %237, 156 %239 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %238) %240 = fmul float %77, %230 %241 = fmul float %79, %233 %242 = fadd float %240, %241 %243 = fmul float %81, %236 %244 = fadd float %242, %243 %245 = fmul float %83, %239 %246 = fadd float %244, %245 %247 = fptosi float %224 to i32 %248 = bitcast i32 %247 to float %249 = bitcast float %248 to i32 %250 = add i32 1, %249 %251 = bitcast i32 %250 to float %252 = bitcast float %251 to i32 %253 = shl i32 %252, 4 %254 = add i32 %253, 144 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = shl i32 %252, 4 %257 = add i32 %256, 148 %258 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %257) %259 = shl i32 %252, 4 %260 = add i32 %259, 152 %261 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %260) %262 = shl i32 %252, 4 %263 = add i32 %262, 156 %264 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %263) %265 = fmul float %77, %255 %266 = fmul float %79, %258 %267 = fadd float %265, %266 %268 = fmul float %81, %261 %269 = fadd float %267, %268 %270 = fmul float %83, %264 %271 = fadd float %269, %270 %272 = fptosi float %224 to i32 %273 = bitcast i32 %272 to float %274 = bitcast float %273 to i32 %275 = add i32 2, %274 %276 = bitcast i32 %275 to float %277 = bitcast float %276 to i32 %278 = shl i32 %277, 4 %279 = add i32 %278, 144 %280 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %279) %281 = shl i32 %277, 4 %282 = add i32 %281, 148 %283 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %282) %284 = shl i32 %277, 4 %285 = add i32 %284, 152 %286 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %285) %287 = shl i32 %277, 4 %288 = add i32 %287, 156 %289 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %288) %290 = fmul float %77, %280 %291 = fmul float %79, %283 %292 = fadd float %290, %291 %293 = fmul float %81, %286 %294 = fadd float %292, %293 %295 = fmul float %83, %289 %296 = fadd float %294, %295 %297 = fmul float %53, %246 %298 = fadd float %297, %156 %299 = fmul float %53, %271 %300 = fadd float %299, %157 %301 = fmul float %53, %296 %302 = fadd float %301, %158 %303 = fptosi float %224 to i32 %304 = bitcast i32 %303 to float %305 = bitcast float %304 to i32 %306 = shl i32 %305, 4 %307 = add i32 %306, 144 %308 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %307) %309 = shl i32 %305, 4 %310 = add i32 %309, 148 %311 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %310) %312 = shl i32 %305, 4 %313 = add i32 %312, 152 %314 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %313) %315 = fmul float %66, %308 %316 = fmul float %67, %311 %317 = fadd float %316, %315 %318 = fmul float %68, %314 %319 = fadd float %317, %318 %320 = fptosi float %224 to i32 %321 = bitcast i32 %320 to float %322 = bitcast float %321 to i32 %323 = add i32 1, %322 %324 = bitcast i32 %323 to float %325 = bitcast float %324 to i32 %326 = shl i32 %325, 4 %327 = add i32 %326, 144 %328 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %327) %329 = shl i32 %325, 4 %330 = add i32 %329, 148 %331 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %330) %332 = shl i32 %325, 4 %333 = add i32 %332, 152 %334 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %333) %335 = fmul float %66, %328 %336 = fmul float %67, %331 %337 = fadd float %336, %335 %338 = fmul float %68, %334 %339 = fadd float %337, %338 %340 = fptosi float %224 to i32 %341 = bitcast i32 %340 to float %342 = bitcast float %341 to i32 %343 = add i32 2, %342 %344 = bitcast i32 %343 to float %345 = bitcast float %344 to i32 %346 = shl i32 %345, 4 %347 = add i32 %346, 144 %348 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %347) %349 = shl i32 %345, 4 %350 = add i32 %349, 148 %351 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %350) %352 = shl i32 %345, 4 %353 = add i32 %352, 152 %354 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %353) %355 = fmul float %66, %348 %356 = fmul float %67, %351 %357 = fadd float %356, %355 %358 = fmul float %68, %354 %359 = fadd float %357, %358 %360 = fmul float %53, %319 %361 = fadd float %360, %216 %362 = fmul float %53, %339 %363 = fadd float %362, %217 %364 = fmul float %53, %359 %365 = fadd float %364, %218 %366 = fcmp olt float 0.000000e+00, %54 %367 = sext i1 %366 to i32 %368 = bitcast i32 %367 to float %369 = bitcast float %368 to i32 %370 = icmp ne i32 %369, 0 br i1 %370, label %IF70, label %ENDIF ENDIF: ; preds = %IF70, %IF, %main_body %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %429, %IF70 ], [ %224, %IF ] %temp8.0 = phi float [ %156, %main_body ], [ %503, %IF70 ], [ %298, %IF ] %temp9.0 = phi float [ %157, %main_body ], [ %505, %IF70 ], [ %300, %IF ] %temp10.0 = phi float [ %158, %main_body ], [ %507, %IF70 ], [ %302, %IF ] %temp12.0 = phi float [ %216, %main_body ], [ %566, %IF70 ], [ %361, %IF ] %temp13.0 = phi float [ %217, %main_body ], [ %568, %IF70 ], [ %363, %IF ] %temp14.0 = phi float [ %218, %main_body ], [ %570, %IF70 ], [ %365, %IF ] %371 = fmul float %temp9.0, %17 %372 = fmul float %temp9.0, %18 %373 = fmul float %temp9.0, %19 %374 = fmul float %temp9.0, %20 %375 = fmul float %temp8.0, %13 %376 = fadd float %375, %371 %377 = fmul float %temp8.0, %14 %378 = fadd float %377, %372 %379 = fmul float %temp8.0, %15 %380 = fadd float %379, %373 %381 = fmul float %temp8.0, %16 %382 = fadd float %381, %374 %383 = fmul float %temp10.0, %21 %384 = fadd float %383, %376 %385 = fmul float %temp10.0, %22 %386 = fadd float %385, %378 %387 = fmul float %temp10.0, %23 %388 = fadd float %387, %380 %389 = fmul float %temp10.0, %24 %390 = fadd float %389, %382 %391 = fadd float %384, %25 %392 = fadd float %386, %26 %393 = fadd float %388, %27 %394 = fadd float %390, %28 %395 = fmul float %temp12.0, %temp12.0 %396 = fmul float %temp13.0, %temp13.0 %397 = fadd float %396, %395 %398 = fmul float %temp14.0, %temp14.0 %399 = fadd float %397, %398 %400 = fcmp uge float %399, 0x3E7AD7F2A0000000 %401 = select i1 %400, float %399, float 0x3E7AD7F2A0000000 %402 = call float @llvm.AMDGPU.rsq.clamped.f32(float %401) %403 = fmul float %402, %temp12.0 %404 = fmul float %402, %temp13.0 %405 = fmul float %402, %temp14.0 %406 = fdiv float 1.000000e+00, %394 %407 = fmul float %391, %406 %408 = fmul float %392, %406 %409 = fadd float %temp8.0, %39 %410 = fadd float %temp9.0, %40 %411 = fmul float %409, %37 %412 = fmul float %410, %38 %413 = fmul float %temp9.0, %31 %414 = fmul float %temp9.0, %32 %415 = fmul float %temp8.0, %29 %416 = fadd float %415, %413 %417 = fmul float %temp8.0, %30 %418 = fadd float %417, %414 %419 = fmul float %temp10.0, %33 %420 = fadd float %419, %416 %421 = fmul float %temp10.0, %34 %422 = fadd float %421, %418 %423 = fadd float %420, %35 %424 = fadd float %422, %36 %425 = fmul float %423, 5.000000e-01 %426 = fadd float %425, -5.000000e-01 %427 = fmul float %424, -5.000000e-01 %428 = fadd float %427, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %403, float %404, float %405, float %temp3.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %73, float %74, float %411, float %412) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp8.0, float %temp9.0, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %407, float %408, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %426, float %428, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %391, float %392, float %393, float %394) ret void IF70: ; preds = %IF %429 = fmul float 3.000000e+00, %61 %430 = fptosi float %429 to i32 %431 = bitcast i32 %430 to float %432 = bitcast float %431 to i32 %433 = shl i32 %432, 4 %434 = add i32 %433, 144 %435 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %434) %436 = shl i32 %432, 4 %437 = add i32 %436, 148 %438 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %437) %439 = shl i32 %432, 4 %440 = add i32 %439, 152 %441 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %440) %442 = shl i32 %432, 4 %443 = add i32 %442, 156 %444 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %443) %445 = fmul float %77, %435 %446 = fmul float %79, %438 %447 = fadd float %445, %446 %448 = fmul float %81, %441 %449 = fadd float %447, %448 %450 = fmul float %83, %444 %451 = fadd float %449, %450 %452 = fptosi float %429 to i32 %453 = bitcast i32 %452 to float %454 = bitcast float %453 to i32 %455 = add i32 1, %454 %456 = bitcast i32 %455 to float %457 = bitcast float %456 to i32 %458 = shl i32 %457, 4 %459 = add i32 %458, 144 %460 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %459) %461 = shl i32 %457, 4 %462 = add i32 %461, 148 %463 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %462) %464 = shl i32 %457, 4 %465 = add i32 %464, 152 %466 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %465) %467 = shl i32 %457, 4 %468 = add i32 %467, 156 %469 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %468) %470 = fmul float %77, %460 %471 = fmul float %79, %463 %472 = fadd float %470, %471 %473 = fmul float %81, %466 %474 = fadd float %472, %473 %475 = fmul float %83, %469 %476 = fadd float %474, %475 %477 = fptosi float %429 to i32 %478 = bitcast i32 %477 to float %479 = bitcast float %478 to i32 %480 = add i32 2, %479 %481 = bitcast i32 %480 to float %482 = bitcast float %481 to i32 %483 = shl i32 %482, 4 %484 = add i32 %483, 144 %485 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %484) %486 = shl i32 %482, 4 %487 = add i32 %486, 148 %488 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %487) %489 = shl i32 %482, 4 %490 = add i32 %489, 152 %491 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %490) %492 = shl i32 %482, 4 %493 = add i32 %492, 156 %494 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %493) %495 = fmul float %77, %485 %496 = fmul float %79, %488 %497 = fadd float %495, %496 %498 = fmul float %81, %491 %499 = fadd float %497, %498 %500 = fmul float %83, %494 %501 = fadd float %499, %500 %502 = fmul float %54, %451 %503 = fadd float %502, %298 %504 = fmul float %54, %476 %505 = fadd float %504, %300 %506 = fmul float %54, %501 %507 = fadd float %506, %302 %508 = fptosi float %429 to i32 %509 = bitcast i32 %508 to float %510 = bitcast float %509 to i32 %511 = shl i32 %510, 4 %512 = add i32 %511, 144 %513 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %512) %514 = shl i32 %510, 4 %515 = add i32 %514, 148 %516 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %515) %517 = shl i32 %510, 4 %518 = add i32 %517, 152 %519 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %518) %520 = fmul float %66, %513 %521 = fmul float %67, %516 %522 = fadd float %521, %520 %523 = fmul float %68, %519 %524 = fadd float %522, %523 %525 = fptosi float %429 to i32 %526 = bitcast i32 %525 to float %527 = bitcast float %526 to i32 %528 = add i32 1, %527 %529 = bitcast i32 %528 to float %530 = bitcast float %529 to i32 %531 = shl i32 %530, 4 %532 = add i32 %531, 144 %533 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %532) %534 = shl i32 %530, 4 %535 = add i32 %534, 148 %536 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %535) %537 = shl i32 %530, 4 %538 = add i32 %537, 152 %539 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %538) %540 = fmul float %66, %533 %541 = fmul float %67, %536 %542 = fadd float %541, %540 %543 = fmul float %68, %539 %544 = fadd float %542, %543 %545 = fptosi float %429 to i32 %546 = bitcast i32 %545 to float %547 = bitcast float %546 to i32 %548 = add i32 2, %547 %549 = bitcast i32 %548 to float %550 = bitcast float %549 to i32 %551 = shl i32 %550, 4 %552 = add i32 %551, 144 %553 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %552) %554 = shl i32 %550, 4 %555 = add i32 %554, 148 %556 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %555) %557 = shl i32 %550, 4 %558 = add i32 %557, 152 %559 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %558) %560 = fmul float %66, %553 %561 = fmul float %67, %556 %562 = fadd float %561, %560 %563 = fmul float %68, %559 %564 = fadd float %562, %563 %565 = fmul float %54, %524 %566 = fadd float %565, %361 %567 = fmul float %54, %544 %568 = fadd float %567, %363 %569 = fmul float %54, %564 %570 = fadd float %569, %365 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v6, s10, v0 ; 4A0C000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[19:22], s[4:7][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80011306 V_MOV_B32_e32 v0, 3.000000e+00 ; 7E0002FF 40400000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v19, v0 ; 10000113 V_CVT_I32_F32_e32 v0, v0 ; 7E001100 V_LSHLREV_B32_e32 v2, 4, v0 ; 34040084 V_ADD_I32_e32 v1, 0x90, v2 ; 4A0204FF 00000090 S_LOAD_DWORDX4 s[4:7], s[2:3], 0x0 ; C0820300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v3, s[4:7] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010301 S_LOAD_DWORDX4 s[0:3], s[8:9], 0xc ; C080090C S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[15:18], s[0:3][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000F06 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v1, v15, v3 ; 1002070F V_ADD_I32_e32 v4, 0x94, v2 ; 4A0804FF 00000094 BUFFER_LOAD_DWORD v7, s[4:7] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010704 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v16, v7, v1, 0, 0 ; D2820001 04060F10 V_ADD_I32_e32 v4, 0x98, v2 ; 4A0804FF 00000098 BUFFER_LOAD_DWORD v8, s[4:7] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010804 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v17, v8, v1, 0, 0 ; D2820001 04061111 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[11:14], s[0:3][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000B06 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v1, v1, v11 ; 10021701 V_ADD_I32_e32 v4, 2, v0 ; 4A080082 V_LSHLREV_B32_e32 v9, 4, v4 ; 34120884 V_ADD_I32_e32 v4, 0x90, v9 ; 4A0812FF 00000090 BUFFER_LOAD_DWORD v10, s[4:7] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010A04 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v15, v10 ; 1008150F V_ADD_I32_e32 v5, 0x94, v9 ; 4A0A12FF 00000094 BUFFER_LOAD_DWORD v27, s[4:7] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011B05 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v16, v27, v4, 0, 0 ; D2820004 04123710 V_ADD_I32_e32 v5, 0x98, v9 ; 4A0A12FF 00000098 BUFFER_LOAD_DWORD v28, s[4:7] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011C05 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v17, v28, v4, 0, 0 ; D2820004 04123911 V_MUL_F32_e32 v4, v4, v11 ; 10081704 V_ADD_I32_e32 v0, 1, v0 ; 4A000081 V_LSHLREV_B32_e32 v29, 4, v0 ; 343A0084 V_ADD_I32_e32 v0, 0x90, v29 ; 4A003AFF 00000090 BUFFER_LOAD_DWORD v30, s[4:7] + v0 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011E00 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v15, v30 ; 10003D0F V_ADD_I32_e32 v5, 0x94, v29 ; 4A0A3AFF 00000094 BUFFER_LOAD_DWORD v31, s[4:7] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011F05 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v16, v31, v0, 0, 0 ; D2820000 04023F10 V_ADD_I32_e32 v5, 0x98, v29 ; 4A0A3AFF 00000098 BUFFER_LOAD_DWORD v32, s[4:7] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012005 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v17, v32, v0, 0, 0 ; D2820000 04024111 V_MUL_F32_e32 v5, v0, v11 ; 100A1700 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[23:26], s[0:3][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80001706 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v24, v7 ; 10000F18 V_MAD_F32 v0, v23, v3, v0, 0, 0 ; D2820000 04020717 V_MAD_F32 v0, v25, v8, v0, 0, 0 ; D2820000 04021119 V_ADD_I32_e32 v2, 0x9c, v2 ; 4A0404FF 0000009C BUFFER_LOAD_DWORD v2, s[4:7] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010202 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v0, v2 ; 06000500 V_MUL_F32_e32 v0, v0, v11 ; 10001700 V_MUL_F32_e32 v2, v24, v27 ; 10043718 V_MAD_F32 v2, v23, v10, v2, 0, 0 ; D2820002 040A1517 V_MAD_F32 v2, v25, v28, v2, 0, 0 ; D2820002 040A3919 V_ADD_I32_e32 v3, 0x9c, v9 ; 4A0612FF 0000009C BUFFER_LOAD_DWORD v3, s[4:7] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010303 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v2, v2, v3 ; 06040702 V_MUL_F32_e32 v2, v2, v11 ; 10041702 V_MUL_F32_e32 v3, v24, v31 ; 10063F18 V_MAD_F32 v3, v23, v30, v3, 0, 0 ; D2820003 040E3D17 V_MAD_F32 v3, v25, v32, v3, 0, 0 ; D2820003 040E4119 V_ADD_I32_e32 v7, 0x9c, v29 ; 4A0E3AFF 0000009C BUFFER_LOAD_DWORD v7, s[4:7] + v7 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010707 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v3, v3, v7 ; 06060F03 V_MUL_F32_e32 v3, v3, v11 ; 10061703 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x10 ; C0800910 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[6:9], s[0:3][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000606 V_CMP_GT_F32_e64 s[0:1], v12, 0.000000e+00, 0, 0 ; D0080000 0001010C V_MOV_B32_e32 v10, 0.000000e+00 ; 7E140280 V_MOV_B32_e32 v27, 1.000000e+00 ; 7E3602F2 S_WAITCNT vmcnt(0) ; BF8C0770 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_CBRANCH_EXECZ BB0_3 ; BF880000 V_MOV_B32_e32 v10, 3.000000e+00 ; 7E1402FF 40400000 V_MUL_F32_e32 v10, v20, v10 ; 10141514 V_CVT_I32_F32_e32 v28, v10 ; 7E38110A V_LSHLREV_B32_e32 v29, 4, v28 ; 343A3884 V_ADD_I32_e32 v30, 0x94, v29 ; 4A3C3AFF 00000094 BUFFER_LOAD_DWORD v30, s[4:7] + v30 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011E1E V_ADD_I32_e32 v31, 0x90, v29 ; 4A3E3AFF 00000090 BUFFER_LOAD_DWORD v31, s[4:7] + v31 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011F1F S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v32, v15, v31 ; 10403F0F V_MAD_F32 v32, v16, v30, v32, 0, 0 ; D2820020 04823D10 V_ADD_I32_e32 v33, 0x98, v29 ; 4A423AFF 00000098 BUFFER_LOAD_DWORD v33, s[4:7] + v33 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012121 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v32, v17, v33, v32, 0, 0 ; D2820020 04824311 V_MAD_F32 v1, v12, v32, v1, 0, 0 ; D2820001 0406410C V_ADD_I32_e32 v32, 2, v28 ; 4A403882 V_LSHLREV_B32_e32 v32, 4, v32 ; 34404084 V_ADD_I32_e32 v34, 0x94, v32 ; 4A4440FF 00000094 BUFFER_LOAD_DWORD v34, s[4:7] + v34 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012222 V_ADD_I32_e32 v35, 0x90, v32 ; 4A4640FF 00000090 BUFFER_LOAD_DWORD v35, s[4:7] + v35 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012323 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v36, v15, v35 ; 1048470F V_MAD_F32 v36, v16, v34, v36, 0, 0 ; D2820024 04924510 V_ADD_I32_e32 v37, 0x98, v32 ; 4A4A40FF 00000098 BUFFER_LOAD_DWORD v37, s[4:7] + v37 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012525 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v36, v17, v37, v36, 0, 0 ; D2820024 04924B11 V_MAD_F32 v4, v12, v36, v4, 0, 0 ; D2820004 0412490C V_ADD_I32_e32 v28, 1, v28 ; 4A383881 V_LSHLREV_B32_e32 v28, 4, v28 ; 34383884 V_ADD_I32_e32 v36, 0x94, v28 ; 4A4838FF 00000094 BUFFER_LOAD_DWORD v36, s[4:7] + v36 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012424 V_ADD_I32_e32 v38, 0x90, v28 ; 4A4C38FF 00000090 BUFFER_LOAD_DWORD v38, s[4:7] + v38 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012626 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v39, v15, v38 ; 104E4D0F V_MAD_F32 v39, v16, v36, v39, 0, 0 ; D2820027 049E4910 V_ADD_I32_e32 v40, 0x98, v28 ; 4A5038FF 00000098 BUFFER_LOAD_DWORD v40, s[4:7] + v40 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012828 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v39, v17, v40, v39, 0, 0 ; D2820027 049E5111 V_MAD_F32 v5, v12, v39, v5, 0, 0 ; D2820005 04164F0C V_MUL_F32_e32 v30, v24, v30 ; 103C3D18 V_MAD_F32 v30, v23, v31, v30, 0, 0 ; D282001E 047A3F17 V_MAD_F32 v30, v25, v33, v30, 0, 0 ; D282001E 047A4319 V_ADD_I32_e32 v29, 0x9c, v29 ; 4A3A3AFF 0000009C BUFFER_LOAD_DWORD v29, s[4:7] + v29 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011D1D S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v29, v27, v29, v30, 0, 0 ; D282001D 047A3B1B V_MAD_F32 v0, v12, v29, v0, 0, 0 ; D2820000 04023B0C V_MUL_F32_e32 v29, v24, v34 ; 103A4518 V_MAD_F32 v29, v23, v35, v29, 0, 0 ; D282001D 04764717 V_MAD_F32 v29, v25, v37, v29, 0, 0 ; D282001D 04764B19 V_ADD_I32_e32 v30, 0x9c, v32 ; 4A3C40FF 0000009C BUFFER_LOAD_DWORD v30, s[4:7] + v30 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011E1E S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v29, v27, v30, v29, 0, 0 ; D282001D 04763D1B V_MAD_F32 v2, v12, v29, v2, 0, 0 ; D2820002 040A3B0C V_MUL_F32_e32 v29, v24, v36 ; 103A4918 V_MAD_F32 v29, v23, v38, v29, 0, 0 ; D282001D 04764D17 V_MAD_F32 v29, v25, v40, v29, 0, 0 ; D282001D 04765119 V_ADD_I32_e32 v28, 0x9c, v28 ; 4A3838FF 0000009C BUFFER_LOAD_DWORD v28, s[4:7] + v28 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011C1C S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v28, v27, v28, v29, 0, 0 ; D282001C 0476391B V_MAD_F32 v3, v12, v28, v3, 0, 0 ; D2820003 040E390C V_CMP_GT_F32_e64 s[2:3], v13, 0.000000e+00, 0, 0 ; D0080002 0001010D S_AND_SAVEEXEC_B64 s[2:3], s[2:3] ; BE822402 S_XOR_B64 s[2:3], exec, s[2:3] ; 8982027E S_CBRANCH_EXECZ BB0_2 ; BF880000 V_MOV_B32_e32 v10, 3.000000e+00 ; 7E1402FF 40400000 V_MUL_F32_e32 v10, v21, v10 ; 10141515 V_CVT_I32_F32_e32 v19, v10 ; 7E26110A V_LSHLREV_B32_e32 v20, 4, v19 ; 34282684 V_ADD_I32_e32 v21, 0x94, v20 ; 4A2A28FF 00000094 BUFFER_LOAD_DWORD v21, s[4:7] + v21 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011515 V_ADD_I32_e32 v22, 0x90, v20 ; 4A2C28FF 00000090 BUFFER_LOAD_DWORD v22, s[4:7] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011616 S_WAITCNT vmcnt(0) expcnt(0) ; BF8C0700 V_MUL_F32_e32 v28, v15, v22 ; 10382D0F V_MAD_F32 v28, v16, v21, v28, 0, 0 ; D282001C 04722B10 V_ADD_I32_e32 v29, 0x98, v20 ; 4A3A28FF 00000098 BUFFER_LOAD_DWORD v29, s[4:7] + v29 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011D1D S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v28, v17, v29, v28, 0, 0 ; D282001C 04723B11 V_MAD_F32 v1, v13, v28, v1, 0, 0 ; D2820001 0406390D V_ADD_I32_e32 v28, 2, v19 ; 4A382682 V_LSHLREV_B32_e32 v28, 4, v28 ; 34383884 V_ADD_I32_e32 v30, 0x94, v28 ; 4A3C38FF 00000094 BUFFER_LOAD_DWORD v30, s[4:7] + v30 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011E1E V_ADD_I32_e32 v31, 0x90, v28 ; 4A3E38FF 00000090 BUFFER_LOAD_DWORD v31, s[4:7] + v31 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011F1F S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v32, v15, v31 ; 10403F0F V_MAD_F32 v32, v16, v30, v32, 0, 0 ; D2820020 04823D10 V_ADD_I32_e32 v33, 0x98, v28 ; 4A4238FF 00000098 BUFFER_LOAD_DWORD v33, s[4:7] + v33 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012121 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v32, v17, v33, v32, 0, 0 ; D2820020 04824311 V_MAD_F32 v4, v13, v32, v4, 0, 0 ; D2820004 0412410D V_ADD_I32_e32 v19, 1, v19 ; 4A262681 V_LSHLREV_B32_e32 v19, 4, v19 ; 34262684 V_ADD_I32_e32 v32, 0x94, v19 ; 4A4026FF 00000094 BUFFER_LOAD_DWORD v32, s[4:7] + v32 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012020 V_ADD_I32_e32 v34, 0x90, v19 ; 4A4426FF 00000090 BUFFER_LOAD_DWORD v34, s[4:7] + v34 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012222 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v35, v15, v34 ; 1046450F V_MAD_F32 v35, v16, v32, v35, 0, 0 ; D2820023 048E4110 V_ADD_I32_e32 v36, 0x98, v19 ; 4A4826FF 00000098 BUFFER_LOAD_DWORD v36, s[4:7] + v36 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012424 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v15, v17, v36, v35, 0, 0 ; D282000F 048E4911 V_MAD_F32 v5, v13, v15, v5, 0, 0 ; D2820005 04161F0D V_MUL_F32_e32 v15, v24, v21 ; 101E2B18 V_MAD_F32 v15, v23, v22, v15, 0, 0 ; D282000F 043E2D17 V_MAD_F32 v15, v25, v29, v15, 0, 0 ; D282000F 043E3B19 V_ADD_I32_e32 v16, 0x9c, v20 ; 4A2028FF 0000009C BUFFER_LOAD_DWORD v16, s[4:7] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011010 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v15, v27, v16, v15, 0, 0 ; D282000F 043E211B V_MAD_F32 v0, v13, v15, v0, 0, 0 ; D2820000 04021F0D V_MUL_F32_e32 v15, v24, v30 ; 101E3D18 V_MAD_F32 v15, v23, v31, v15, 0, 0 ; D282000F 043E3F17 V_MAD_F32 v15, v25, v33, v15, 0, 0 ; D282000F 043E4319 V_ADD_I32_e32 v16, 0x9c, v28 ; 4A2038FF 0000009C BUFFER_LOAD_DWORD v16, s[4:7] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011010 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v15, v27, v16, v15, 0, 0 ; D282000F 043E211B V_MAD_F32 v2, v13, v15, v2, 0, 0 ; D2820002 040A1F0D V_MUL_F32_e32 v15, v24, v32 ; 101E4118 V_MAD_F32 v15, v23, v34, v15, 0, 0 ; D282000F 043E4517 V_MAD_F32 v15, v25, v36, v15, 0, 0 ; D282000F 043E4919 V_ADD_I32_e32 v16, 0x9c, v19 ; 4A2026FF 0000009C BUFFER_LOAD_DWORD v16, s[4:7] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011010 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v15, v27, v16, v15, 0, 0 ; D282000F 043E211B V_MAD_F32 v3, v13, v15, v3, 0, 0 ; D2820003 040E1F0D S_OR_B64 exec, exec, s[2:3] ; 88FE027E S_OR_B64 exec, exec, s[0:1] ; 88FE007E S_BUFFER_LOAD_DWORD s2, s[4:7], 0x23 ; C2010523 S_BUFFER_LOAD_DWORD s3, s[4:7], 0x22 ; C2018522 S_BUFFER_LOAD_DWORD s8, s[4:7], 0x21 ; C2040521 S_BUFFER_LOAD_DWORD s9, s[4:7], 0x20 ; C2048520 S_BUFFER_LOAD_DWORD s10, s[4:7], 0x1d ; C205051D S_BUFFER_LOAD_DWORD s11, s[4:7], 0x1c ; C205851C S_BUFFER_LOAD_DWORD s12, s[4:7], 0x19 ; C2060519 S_BUFFER_LOAD_DWORD s13, s[4:7], 0x18 ; C2068518 S_BUFFER_LOAD_DWORD s14, s[4:7], 0x15 ; C2070515 S_BUFFER_LOAD_DWORD s15, s[4:7], 0x14 ; C2078514 S_BUFFER_LOAD_DWORD s16, s[4:7], 0x11 ; C2080511 S_BUFFER_LOAD_DWORD s17, s[4:7], 0x10 ; C2088510 S_BUFFER_LOAD_DWORD s18, s[4:7], 0xf ; C209050F S_BUFFER_LOAD_DWORD s19, s[4:7], 0xe ; C209850E S_BUFFER_LOAD_DWORD s20, s[4:7], 0xd ; C20A050D S_BUFFER_LOAD_DWORD s21, s[4:7], 0xc ; C20A850C S_BUFFER_LOAD_DWORD s22, s[4:7], 0xb ; C20B050B S_BUFFER_LOAD_DWORD s23, s[4:7], 0xa ; C20B850A S_BUFFER_LOAD_DWORD s24, s[4:7], 0x9 ; C20C0509 S_BUFFER_LOAD_DWORD s25, s[4:7], 0x8 ; C20C8508 S_BUFFER_LOAD_DWORD s26, s[4:7], 0x7 ; C20D0507 S_BUFFER_LOAD_DWORD s27, s[4:7], 0x6 ; C20D8506 S_BUFFER_LOAD_DWORD s28, s[4:7], 0x5 ; C20E0505 S_BUFFER_LOAD_DWORD s29, s[4:7], 0x4 ; C20E8504 S_BUFFER_LOAD_DWORD s30, s[4:7], 0x3 ; C20F0503 S_BUFFER_LOAD_DWORD s31, s[4:7], 0x2 ; C20F8502 S_BUFFER_LOAD_DWORD s32, s[4:7], 0x1 ; C2100501 S_BUFFER_LOAD_DWORD s33, s[4:7], 0x0 ; C2108500 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v11, s2 ; 7E160202 V_MOV_B32_e32 v12, s3 ; 7E180203 V_MOV_B32_e32 v13, s8 ; 7E1A0208 V_MOV_B32_e32 v14, s9 ; 7E1C0209 V_MOV_B32_e32 v15, s10 ; 7E1E020A V_MOV_B32_e32 v16, s11 ; 7E20020B V_MOV_B32_e32 v17, s12 ; 7E22020C V_MOV_B32_e32 v18, s13 ; 7E24020D V_MOV_B32_e32 v19, s14 ; 7E26020E V_MOV_B32_e32 v20, s15 ; 7E28020F V_MOV_B32_e32 v21, s16 ; 7E2A0210 V_MOV_B32_e32 v22, s17 ; 7E2C0211 V_MOV_B32_e32 v23, s18 ; 7E2E0212 V_MOV_B32_e32 v24, s19 ; 7E300213 V_MOV_B32_e32 v25, s20 ; 7E320214 V_MOV_B32_e32 v26, s21 ; 7E340215 V_MOV_B32_e32 v27, s22 ; 7E360216 V_MOV_B32_e32 v28, s23 ; 7E380217 V_MOV_B32_e32 v29, s24 ; 7E3A0218 V_MOV_B32_e32 v30, s25 ; 7E3C0219 V_MOV_B32_e32 v31, s26 ; 7E3E021A V_MOV_B32_e32 v32, s27 ; 7E40021B V_MOV_B32_e32 v33, s28 ; 7E42021C V_MOV_B32_e32 v34, s29 ; 7E44021D V_MOV_B32_e32 v35, s30 ; 7E46021E V_MOV_B32_e32 v36, s31 ; 7E48021F V_MOV_B32_e32 v37, s32 ; 7E4A0220 V_MOV_B32_e32 v38, s33 ; 7E4C0221 V_MUL_F32_e32 v39, v1, v1 ; 104E0301 V_MAD_F32 v39, v5, v5, v39, 0, 0 ; D2820027 049E0B05 V_MAD_F32 v39, v4, v4, v39, 0, 0 ; D2820027 049E0904 V_MOV_B32_e32 v40, 1.000000e-07 ; 7E5002FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v39, v40 ; 7C0C5127 V_CMP_U_F32_e64 s[0:1], v39, v39, 0, 0 ; D0100000 00024F27 V_CNDMASK_B32_e64 v40, 0, -1, vcc, 0, 0, 0, 0 ; D2000028 01A98280 V_CNDMASK_B32_e64 v41, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000029 00018280 V_OR_B32_e32 v40, v40, v41 ; 38505328 V_MOV_B32_e32 v41, 0x33d6bf95 ; 7E5202FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v40, 0, 0, 0 ; D10A0000 00010128 V_CNDMASK_B32_e64 v39, v41, v39, s[0:1], 0, 0, 0, 0 ; D2000027 00024F29 V_RSQ_CLAMP_F32_e32 v39, v39 ; 7E4E5927 V_MUL_F32_e32 v4, v39, v4 ; 10080927 V_MUL_F32_e32 v5, v39, v5 ; 100A0B27 V_MUL_F32_e32 v1, v39, v1 ; 10020327 EXP 15, 32, 0, 0, 0, v1, v5, v4, v10 ; F800020F 0A040501 S_WAITCNT expcnt(0) ; BF8C070F V_ADD_F32_e32 v1, v3, v11 ; 06021703 V_MUL_F32_e32 v1, v1, v13 ; 10021B01 V_ADD_F32_e32 v4, v0, v12 ; 06081900 V_MUL_F32_e32 v4, v4, v14 ; 10081D04 EXP 15, 33, 0, 0, 0, v6, v7, v4, v1 ; F800021F 01040706 S_WAITCNT expcnt(0) ; BF8C070F V_MOV_B32_e32 v1, 1.000000e+00 ; 7E0202F2 EXP 15, 34, 0, 0, 0, v0, v3, v2, v1 ; F800022F 01020300 V_MUL_F32_e32 v4, v3, v33 ; 10084303 V_MAD_F32 v4, v0, v37, v4, 0, 0 ; D2820004 04124B00 V_MAD_F32 v4, v2, v29, v4, 0, 0 ; D2820004 04123B02 V_ADD_F32_e32 v4, v4, v25 ; 06083304 V_MUL_F32_e32 v5, v3, v31 ; 100A3F03 V_MAD_F32 v5, v0, v35, v5, 0, 0 ; D2820005 04164700 V_MAD_F32 v5, v2, v27, v5, 0, 0 ; D2820005 04163702 V_ADD_F32_e32 v5, v5, v23 ; 060A2F05 V_RCP_F32_e32 v6, v5 ; 7E0C5505 V_MUL_F32_e32 v7, v4, v6 ; 100E0D04 V_MUL_F32_e32 v8, v3, v34 ; 10104503 V_MAD_F32 v8, v0, v38, v8, 0, 0 ; D2820008 04224D00 V_MAD_F32 v8, v2, v30, v8, 0, 0 ; D2820008 04223D02 V_ADD_F32_e32 v8, v8, v26 ; 06103508 V_MUL_F32_e32 v6, v8, v6 ; 100C0D08 V_MOV_B32_e32 v9, 0.000000e+00 ; 7E120280 EXP 15, 35, 0, 0, 0, v6, v7, v9, v1 ; F800023F 01090706 S_WAITCNT expcnt(0) ; BF8C070F V_MUL_F32_e32 v6, v3, v19 ; 100C2703 V_MAD_F32 v6, v0, v21, v6, 0, 0 ; D2820006 041A2B00 V_MAD_F32 v6, v2, v17, v6, 0, 0 ; D2820006 041A2302 V_ADD_F32_e32 v6, v6, v15 ; 060C1F06 V_MAD_F32 v6, v6, -5.000000e-01, -5.000000e-01, 0, 0 ; D2820006 03C5E306 V_MUL_F32_e32 v7, v3, v20 ; 100E2903 V_MAD_F32 v7, v0, v22, v7, 0, 0 ; D2820007 041E2D00 V_MAD_F32 v7, v2, v18, v7, 0, 0 ; D2820007 041E2502 V_ADD_F32_e32 v7, v7, v16 ; 060E2107 V_MAD_F32 v7, v7, 5.000000e-01, -5.000000e-01, 0, 0 ; D2820007 03C5E107 EXP 15, 36, 0, 0, 0, v7, v6, v9, v1 ; F800024F 01090607 S_WAITCNT expcnt(0) ; BF8C070F V_MUL_F32_e32 v1, v3, v32 ; 10024103 V_MAD_F32 v0, v0, v36, v1, 0, 0 ; D2820000 04064900 V_MAD_F32 v0, v2, v28, v0, 0, 0 ; D2820000 04023902 V_ADD_F32_e32 v0, v0, v24 ; 06003100 EXP 15, 12, 0, 1, 0, v8, v4, v0, v5 ; F80008CF 05000408 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..8] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, -0.1000, 0.0000} IMM[1] FLT32 { 0.0000, 0.0529, 0.8460, 0.5290} IMM[2] FLT32 { 199.0000, 0.1000, 0.4545, 1.4427} IMM[3] FLT32 { 1.0000, -1.0000, 0.5000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1] 7: ABS TEMP[2].x, TEMP[1].wwww 8: POW TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 9: MOV TEMP[3].x, TEMP[2].xxxx 10: ADD TEMP[4].xyz, TEMP[2].xxxx, IMM[0].zzzz 11: FSLT TEMP[5].xyz, TEMP[4].xyzz, IMM[0].wwww 12: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 13: OR TEMP[6].x, TEMP[6].xxxx, TEMP[5].yyyy 14: UIF TEMP[6].xxxx :0 15: KILL 16: ENDIF 17: MOV TEMP[5].xyz, IN[0].xyzz 18: TEX TEMP[5], TEMP[5], SAMP[2], CUBE 19: POW TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx 20: POW TEMP[6].y, TEMP[5].yyyy, IMM[0].xxxx 21: POW TEMP[6].z, TEMP[5].zzzz, IMM[0].xxxx 22: POW TEMP[6].w, TEMP[5].wwww, IMM[0].yyyy 23: MUL TEMP[1].xyz, TEMP[1], TEMP[6] 24: MOV TEMP[0].xyz, TEMP[1].xyzx 25: ADD TEMP[1].xyz, TEMP[0], TEMP[0] 26: MOV TEMP[0].xyz, TEMP[1].xyzx 27: ADD TEMP[1].yzw, CONST[3].xxyz, -IN[2].xxyz 28: MOV TEMP[3].yzw, TEMP[1].zyzw 29: DP3 TEMP[5].x, TEMP[1].yzww, TEMP[1].yzww 30: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx 31: RSQ TEMP[5].x, TEMP[5].xxxx 32: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[1].yzww 33: MAD TEMP[3].yzw, TEMP[3], TEMP[5].xxxx, IMM[1].yyzw 34: MOV TEMP[5].w, IMM[0].wwww 35: MOV TEMP[5].x, TEMP[3].yyyy 36: MOV TEMP[5].y, TEMP[3].zzzz 37: MOV TEMP[5].z, TEMP[3].wwww 38: DP4 TEMP[3].x, TEMP[5], TEMP[5] 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[3].xyz, TEMP[5], TEMP[3].xxxx 41: DP3 TEMP[3].x, TEMP[3].xyzz, IN[0].xyzz 42: MOV_SAT TEMP[3].x, TEMP[3].xxxx 43: DP3 TEMP[5].x, IN[0].xyzz, TEMP[1].xyzz 44: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 45: MAD TEMP[5].yzw, TEMP[5].yyyy, IN[0].xxyz, -TEMP[1].xxyz 46: MOV TEMP[6].xyz, TEMP[5].yzww 47: TEX TEMP[6], TEMP[6], SAMP[4], CUBE 48: POW TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 49: POW TEMP[7].y, TEMP[6].yyyy, IMM[0].xxxx 50: POW TEMP[7].z, TEMP[6].zzzz, IMM[0].xxxx 51: POW TEMP[7].w, TEMP[6].wwww, IMM[0].yyyy 52: MOV TEMP[4].w, TEMP[7].wwww 53: MOV TEMP[6].xy, IN[1].xyyy 54: TEX TEMP[6], TEMP[6], SAMP[1], 2D 55: POW TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx 56: POW TEMP[8].y, TEMP[6].yyyy, IMM[0].xxxx 57: POW TEMP[8].z, TEMP[6].zzzz, IMM[0].xxxx 58: POW TEMP[8].w, TEMP[6].wwww, IMM[0].yyyy 59: ABS TEMP[6].x, TEMP[8].wwww 60: POW TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 61: MOV TEMP[5].xyz, TEMP[5].yzww 62: TEX TEMP[5], TEMP[5], SAMP[3], CUBE 63: POW TEMP[9].x, TEMP[5].xxxx, IMM[0].xxxx 64: POW TEMP[9].y, TEMP[5].yyyy, IMM[0].xxxx 65: POW TEMP[9].z, TEMP[5].zzzz, IMM[0].xxxx 66: POW TEMP[9].w, TEMP[5].wwww, IMM[0].yyyy 67: LRP TEMP[5].xyz, TEMP[6].xxxx, TEMP[9], TEMP[7] 68: MAD TEMP[6].y, TEMP[6].xxxx, IMM[2].xxxx, IMM[0].yyyy 69: ABS TEMP[3].x, TEMP[3].xxxx 70: POW TEMP[3].x, TEMP[3].xxxx, TEMP[6].yyyy 71: MUL TEMP[6].w, TEMP[6].yyyy, IMM[2].yyyy 72: MOV TEMP[0].w, TEMP[6].wwww 73: MAD TEMP[3].yzw, TEMP[6].wwww, TEMP[3].xxxx, TEMP[5].xxyz 74: MAD TEMP[3].xyz, TEMP[3].yzww, TEMP[8], TEMP[0] 75: ABS TEMP[5].x, TEMP[3].xxxx 76: LG2 TEMP[4].x, TEMP[5].xxxx 77: ABS TEMP[5].x, TEMP[3].yyyy 78: LG2 TEMP[5].x, TEMP[5].xxxx 79: MOV TEMP[4].y, TEMP[5].xxxx 80: ABS TEMP[3].x, TEMP[3].zzzz 81: LG2 TEMP[3].x, TEMP[3].xxxx 82: MOV TEMP[4].z, TEMP[3].xxxx 83: MUL TEMP[3].xyz, TEMP[4], IMM[2].zzzz 84: EX2 TEMP[4].x, TEMP[3].xxxx 85: EX2 TEMP[5].x, TEMP[3].yyyy 86: MOV TEMP[4].y, TEMP[5].xxxx 87: EX2 TEMP[3].x, TEMP[3].zzzz 88: MOV TEMP[4].z, TEMP[3].xxxx 89: MOV TEMP[3].xyz, TEMP[4].xyzz 90: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 3D 91: MAD TEMP[5].xy, IN[4], IMM[3].xyxx, IMM[0].wyww 92: MOV TEMP[5].xy, TEMP[5].xyyy 93: TEX TEMP[5].xzw, TEMP[5], SAMP[7], 2D 94: MOV TEMP[1].w, TEMP[5].wwww 95: MOV TEMP[6].xy, IN[1].zwww 96: TEX TEMP[6].xyz, TEMP[6], SAMP[8], 2D 97: LRP TEMP[3].yzw, TEMP[5].xxxx, TEMP[3].xxyz, TEMP[6].xxyz 98: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 99: MOV TEMP[6].xyz, TEMP[3].yzww 100: TEX TEMP[6], TEMP[6], SAMP[9], 3D 101: LRP TEMP[3].xyz, TEMP[5].xxxx, TEMP[6], TEMP[3].yzww 102: MOV TEMP[1].xyz, TEMP[3].xyzx 103: ADD TEMP[3].xyz, -TEMP[1], CONST[5] 104: MOV TEMP[0].xyz, TEMP[3].xyzx 105: MUL TEMP[3].z, CONST[8].xxxx, IN[2].zzzz 106: MOV TEMP[4].z, TEMP[3].zzzz 107: MOV TEMP[4].xy, IN[2].xyxx 108: ADD TEMP[3].yzw, TEMP[4].xxyz, -CONST[6].xxyz 109: MUL TEMP[5].w, TEMP[3].wwww, CONST[4].xxxx 110: MUL TEMP[5].w, TEMP[5].wwww, IMM[2].wwww 111: EX2 TEMP[5].x, TEMP[5].wwww 112: ADD TEMP[5].w, -TEMP[5].xxxx, IMM[0].yyyy 113: DP3 TEMP[6].x, TEMP[3].yzww, TEMP[3].yzww 114: RCP TEMP[3].x, TEMP[3].wwww 115: MUL TEMP[6].y, TEMP[6].xxxx, CONST[4].yyyy 116: MUL TEMP[5].w, TEMP[5].wwww, TEMP[6].yyyy 117: MUL TEMP[3].w, TEMP[3].xxxx, TEMP[5].wwww 118: MUL TEMP[3].w, TEMP[3].wwww, IMM[2].wwww 119: EX2 TEMP[3].x, TEMP[3].wwww 120: MOV_SAT TEMP[3].x, TEMP[3].xxxx 121: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].yyyy 122: MAD TEMP[5].x, IN[3].yyyy, IMM[3].zzzz, IMM[3].zzzz 123: MOV TEMP[4].x, TEMP[5].xxxx 124: MOV TEMP[4].y, CONST[4].wwww 125: MOV TEMP[4].xy, TEMP[4].xyyy 126: TEX TEMP[4].x, TEMP[4], SAMP[5], 2D 127: MUL TEMP[3].w, TEMP[3].wwww, TEMP[4].xxxx 128: MOV TEMP[0].w, TEMP[3].wwww 129: MAD TEMP[0].xyz, TEMP[3].wwww, TEMP[0], TEMP[1] 130: MOV TEMP[0].xyz, TEMP[0].xyzx 131: MUL TEMP[1].x, TEMP[2].xxxx, IMM[3].zzzz 132: ADD TEMP[3].y, -CONST[7].xxxx, IN[2].zzzz 133: FSGE TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww 134: UIF TEMP[3].xxxx :0 135: MOV TEMP[2].x, TEMP[2].xxxx 136: ELSE :0 137: MOV TEMP[2].x, TEMP[1].xxxx 138: ENDIF 139: MOV TEMP[0].w, TEMP[2].xxxx 140: MOV OUT[0], TEMP[0] 141: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %91 = bitcast float %81 to i32 %92 = bitcast float %82 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %39 to <32 x i8> %96 = bitcast <4 x i32> %41 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = extractelement <4 x float> %97, i32 3 %102 = call float @llvm.pow.f32(float %98, float 0x40019999A0000000) %103 = call float @llvm.pow.f32(float %99, float 0x40019999A0000000) %104 = call float @llvm.pow.f32(float %100, float 0x40019999A0000000) %105 = call float @llvm.pow.f32(float %101, float 1.000000e+00) %106 = call float @fabs(float %105) %107 = call float @llvm.pow.f32(float %106, float 0x40019999A0000000) %108 = fadd float %107, 0xBFB99999A0000000 %109 = fadd float %107, 0xBFB99999A0000000 %110 = fadd float %107, 0xBFB99999A0000000 %111 = fcmp olt float %108, 0.000000e+00 %112 = sext i1 %111 to i32 %113 = fcmp olt float %109, 0.000000e+00 %114 = sext i1 %113 to i32 %115 = fcmp olt float %110, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %112 to float %118 = bitcast i32 %114 to float %119 = bitcast i32 %116 to float %120 = bitcast float %117 to i32 %121 = bitcast float %119 to i32 %122 = or i32 %120, %121 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = bitcast float %118 to i32 %126 = or i32 %124, %125 %127 = bitcast i32 %126 to float %128 = bitcast float %127 to i32 %129 = icmp ne i32 %128, 0 br i1 %129, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %130 = insertelement <4 x float> undef, float %78, i32 0 %131 = insertelement <4 x float> %130, float %79, i32 1 %132 = insertelement <4 x float> %131, float %80, i32 2 %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 3 %134 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %133) %135 = extractelement <4 x float> %134, i32 0 %136 = extractelement <4 x float> %134, i32 1 %137 = extractelement <4 x float> %134, i32 2 %138 = extractelement <4 x float> %134, i32 3 %139 = call float @fabs(float %137) %140 = fdiv float 1.000000e+00, %139 %141 = fmul float %135, %140 %142 = fadd float %141, 1.500000e+00 %143 = fmul float %136, %140 %144 = fadd float %143, 1.500000e+00 %145 = bitcast float %144 to i32 %146 = bitcast float %142 to i32 %147 = bitcast float %138 to i32 %148 = insertelement <4 x i32> undef, i32 %145, i32 0 %149 = insertelement <4 x i32> %148, i32 %146, i32 1 %150 = insertelement <4 x i32> %149, i32 %147, i32 2 %151 = insertelement <4 x i32> %150, i32 undef, i32 3 %152 = bitcast <8 x i32> %47 to <32 x i8> %153 = bitcast <4 x i32> %49 to <16 x i8> %154 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %151, <32 x i8> %152, <16 x i8> %153, i32 4) %155 = extractelement <4 x float> %154, i32 0 %156 = extractelement <4 x float> %154, i32 1 %157 = extractelement <4 x float> %154, i32 2 %158 = extractelement <4 x float> %154, i32 3 %159 = call float @llvm.pow.f32(float %155, float 0x40019999A0000000) %160 = call float @llvm.pow.f32(float %156, float 0x40019999A0000000) %161 = call float @llvm.pow.f32(float %157, float 0x40019999A0000000) %162 = call float @llvm.pow.f32(float %158, float 1.000000e+00) %163 = fmul float %102, %159 %164 = fmul float %103, %160 %165 = fmul float %104, %161 %166 = fadd float %163, %163 %167 = fadd float %164, %164 %168 = fadd float %165, %165 %169 = fsub float -0.000000e+00, %85 %170 = fadd float %24, %169 %171 = fsub float -0.000000e+00, %86 %172 = fadd float %25, %171 %173 = fsub float -0.000000e+00, %87 %174 = fadd float %26, %173 %175 = fmul float %170, %170 %176 = fmul float %172, %172 %177 = fadd float %176, %175 %178 = fmul float %174, %174 %179 = fadd float %177, %178 %180 = fcmp uge float %179, 0x3E7AD7F2A0000000 %181 = select i1 %180, float %179, float 0x3E7AD7F2A0000000 %182 = call float @llvm.AMDGPU.rsq.clamped.f32(float %181) %183 = fmul float %182, %170 %184 = fmul float %182, %172 %185 = fmul float %182, %174 %186 = fmul float %170, %182 %187 = fadd float %186, 0x3FAB15B580000000 %188 = fmul float %172, %182 %189 = fadd float %188, 0x3FEB126EA0000000 %190 = fmul float %174, %182 %191 = fadd float %190, 0x3FE0ED9160000000 %192 = fmul float %187, %187 %193 = fmul float %189, %189 %194 = fadd float %192, %193 %195 = fmul float %191, %191 %196 = fadd float %194, %195 %197 = fmul float 0.000000e+00, 0.000000e+00 %198 = fadd float %196, %197 %199 = call float @llvm.AMDGPU.rsq.clamped.f32(float %198) %200 = fmul float %187, %199 %201 = fmul float %189, %199 %202 = fmul float %191, %199 %203 = fmul float %200, %78 %204 = fmul float %201, %79 %205 = fadd float %204, %203 %206 = fmul float %202, %80 %207 = fadd float %205, %206 %208 = call float @llvm.AMDIL.clamp.(float %207, float 0.000000e+00, float 1.000000e+00) %209 = fmul float %78, %183 %210 = fmul float %79, %184 %211 = fadd float %210, %209 %212 = fmul float %80, %185 %213 = fadd float %211, %212 %214 = fadd float %213, %213 %215 = fsub float -0.000000e+00, %183 %216 = fmul float %214, %78 %217 = fadd float %216, %215 %218 = fsub float -0.000000e+00, %184 %219 = fmul float %214, %79 %220 = fadd float %219, %218 %221 = fsub float -0.000000e+00, %185 %222 = fmul float %214, %80 %223 = fadd float %222, %221 %224 = insertelement <4 x float> undef, float %217, i32 0 %225 = insertelement <4 x float> %224, float %220, i32 1 %226 = insertelement <4 x float> %225, float %223, i32 2 %227 = insertelement <4 x float> %226, float %162, i32 3 %228 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %227) %229 = extractelement <4 x float> %228, i32 0 %230 = extractelement <4 x float> %228, i32 1 %231 = extractelement <4 x float> %228, i32 2 %232 = extractelement <4 x float> %228, i32 3 %233 = call float @fabs(float %231) %234 = fdiv float 1.000000e+00, %233 %235 = fmul float %229, %234 %236 = fadd float %235, 1.500000e+00 %237 = fmul float %230, %234 %238 = fadd float %237, 1.500000e+00 %239 = bitcast float %238 to i32 %240 = bitcast float %236 to i32 %241 = bitcast float %232 to i32 %242 = insertelement <4 x i32> undef, i32 %239, i32 0 %243 = insertelement <4 x i32> %242, i32 %240, i32 1 %244 = insertelement <4 x i32> %243, i32 %241, i32 2 %245 = insertelement <4 x i32> %244, i32 undef, i32 3 %246 = bitcast <8 x i32> %55 to <32 x i8> %247 = bitcast <4 x i32> %57 to <16 x i8> %248 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %245, <32 x i8> %246, <16 x i8> %247, i32 4) %249 = extractelement <4 x float> %248, i32 0 %250 = extractelement <4 x float> %248, i32 1 %251 = extractelement <4 x float> %248, i32 2 %252 = call float @llvm.pow.f32(float %249, float 0x40019999A0000000) %253 = call float @llvm.pow.f32(float %250, float 0x40019999A0000000) %254 = call float @llvm.pow.f32(float %251, float 0x40019999A0000000) %255 = bitcast float %81 to i32 %256 = bitcast float %82 to i32 %257 = insertelement <2 x i32> undef, i32 %255, i32 0 %258 = insertelement <2 x i32> %257, i32 %256, i32 1 %259 = bitcast <8 x i32> %43 to <32 x i8> %260 = bitcast <4 x i32> %45 to <16 x i8> %261 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %258, <32 x i8> %259, <16 x i8> %260, i32 2) %262 = extractelement <4 x float> %261, i32 0 %263 = extractelement <4 x float> %261, i32 1 %264 = extractelement <4 x float> %261, i32 2 %265 = extractelement <4 x float> %261, i32 3 %266 = call float @llvm.pow.f32(float %262, float 0x40019999A0000000) %267 = call float @llvm.pow.f32(float %263, float 0x40019999A0000000) %268 = call float @llvm.pow.f32(float %264, float 0x40019999A0000000) %269 = call float @llvm.pow.f32(float %265, float 1.000000e+00) %270 = call float @fabs(float %269) %271 = call float @llvm.pow.f32(float %270, float 0x40019999A0000000) %272 = insertelement <4 x float> undef, float %217, i32 0 %273 = insertelement <4 x float> %272, float %220, i32 1 %274 = insertelement <4 x float> %273, float %223, i32 2 %275 = insertelement <4 x float> %274, float %223, i32 3 %276 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %275) %277 = extractelement <4 x float> %276, i32 0 %278 = extractelement <4 x float> %276, i32 1 %279 = extractelement <4 x float> %276, i32 2 %280 = extractelement <4 x float> %276, i32 3 %281 = call float @fabs(float %279) %282 = fdiv float 1.000000e+00, %281 %283 = fmul float %277, %282 %284 = fadd float %283, 1.500000e+00 %285 = fmul float %278, %282 %286 = fadd float %285, 1.500000e+00 %287 = bitcast float %286 to i32 %288 = bitcast float %284 to i32 %289 = bitcast float %280 to i32 %290 = insertelement <4 x i32> undef, i32 %287, i32 0 %291 = insertelement <4 x i32> %290, i32 %288, i32 1 %292 = insertelement <4 x i32> %291, i32 %289, i32 2 %293 = insertelement <4 x i32> %292, i32 undef, i32 3 %294 = bitcast <8 x i32> %51 to <32 x i8> %295 = bitcast <4 x i32> %53 to <16 x i8> %296 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %293, <32 x i8> %294, <16 x i8> %295, i32 4) %297 = extractelement <4 x float> %296, i32 0 %298 = extractelement <4 x float> %296, i32 1 %299 = extractelement <4 x float> %296, i32 2 %300 = call float @llvm.pow.f32(float %297, float 0x40019999A0000000) %301 = call float @llvm.pow.f32(float %298, float 0x40019999A0000000) %302 = call float @llvm.pow.f32(float %299, float 0x40019999A0000000) %303 = call float @llvm.AMDGPU.lrp(float %271, float %300, float %252) %304 = call float @llvm.AMDGPU.lrp(float %271, float %301, float %253) %305 = call float @llvm.AMDGPU.lrp(float %271, float %302, float %254) %306 = fmul float %271, 1.990000e+02 %307 = fadd float %306, 1.000000e+00 %308 = call float @fabs(float %208) %309 = call float @llvm.pow.f32(float %308, float %307) %310 = fmul float %307, 0x3FB99999A0000000 %311 = fmul float %310, %309 %312 = fadd float %311, %303 %313 = fmul float %310, %309 %314 = fadd float %313, %304 %315 = fmul float %310, %309 %316 = fadd float %315, %305 %317 = fmul float %312, %266 %318 = fadd float %317, %166 %319 = fmul float %314, %267 %320 = fadd float %319, %167 %321 = fmul float %316, %268 %322 = fadd float %321, %168 %323 = call float @fabs(float %318) %324 = call float @llvm.log2.f32(float %323) %325 = call float @fabs(float %320) %326 = call float @llvm.log2.f32(float %325) %327 = call float @fabs(float %322) %328 = call float @llvm.log2.f32(float %327) %329 = fmul float %324, 0x3FDD1743E0000000 %330 = fmul float %326, 0x3FDD1743E0000000 %331 = fmul float %328, 0x3FDD1743E0000000 %332 = call float @llvm.AMDIL.exp.(float %329) %333 = call float @llvm.AMDIL.exp.(float %330) %334 = call float @llvm.AMDIL.exp.(float %331) %335 = bitcast float %332 to i32 %336 = bitcast float %333 to i32 %337 = bitcast float %334 to i32 %338 = insertelement <4 x i32> undef, i32 %335, i32 0 %339 = insertelement <4 x i32> %338, i32 %336, i32 1 %340 = insertelement <4 x i32> %339, i32 %337, i32 2 %341 = insertelement <4 x i32> %340, i32 undef, i32 3 %342 = bitcast <8 x i32> %63 to <32 x i8> %343 = bitcast <4 x i32> %65 to <16 x i8> %344 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %341, <32 x i8> %342, <16 x i8> %343, i32 3) %345 = extractelement <4 x float> %344, i32 0 %346 = extractelement <4 x float> %344, i32 1 %347 = extractelement <4 x float> %344, i32 2 %348 = fmul float %89, 1.000000e+00 %349 = fadd float %348, 0.000000e+00 %350 = fmul float %90, -1.000000e+00 %351 = fadd float %350, 1.000000e+00 %352 = bitcast float %349 to i32 %353 = bitcast float %351 to i32 %354 = insertelement <2 x i32> undef, i32 %352, i32 0 %355 = insertelement <2 x i32> %354, i32 %353, i32 1 %356 = bitcast <8 x i32> %67 to <32 x i8> %357 = bitcast <4 x i32> %69 to <16 x i8> %358 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %355, <32 x i8> %356, <16 x i8> %357, i32 2) %359 = extractelement <4 x float> %358, i32 0 %360 = extractelement <4 x float> %358, i32 2 %361 = bitcast float %83 to i32 %362 = bitcast float %84 to i32 %363 = insertelement <2 x i32> undef, i32 %361, i32 0 %364 = insertelement <2 x i32> %363, i32 %362, i32 1 %365 = bitcast <8 x i32> %71 to <32 x i8> %366 = bitcast <4 x i32> %73 to <16 x i8> %367 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %364, <32 x i8> %365, <16 x i8> %366, i32 2) %368 = extractelement <4 x float> %367, i32 0 %369 = extractelement <4 x float> %367, i32 1 %370 = extractelement <4 x float> %367, i32 2 %371 = call float @llvm.AMDGPU.lrp(float %359, float %345, float %368) %372 = call float @llvm.AMDGPU.lrp(float %359, float %346, float %369) %373 = call float @llvm.AMDGPU.lrp(float %359, float %347, float %370) %374 = fsub float -0.000000e+00, %359 %375 = fmul float %360, %374 %376 = fadd float %375, %359 %377 = bitcast float %371 to i32 %378 = bitcast float %372 to i32 %379 = bitcast float %373 to i32 %380 = insertelement <4 x i32> undef, i32 %377, i32 0 %381 = insertelement <4 x i32> %380, i32 %378, i32 1 %382 = insertelement <4 x i32> %381, i32 %379, i32 2 %383 = insertelement <4 x i32> %382, i32 undef, i32 3 %384 = bitcast <8 x i32> %75 to <32 x i8> %385 = bitcast <4 x i32> %77 to <16 x i8> %386 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %383, <32 x i8> %384, <16 x i8> %385, i32 3) %387 = extractelement <4 x float> %386, i32 0 %388 = extractelement <4 x float> %386, i32 1 %389 = extractelement <4 x float> %386, i32 2 %390 = call float @llvm.AMDGPU.lrp(float %376, float %387, float %371) %391 = call float @llvm.AMDGPU.lrp(float %376, float %388, float %372) %392 = call float @llvm.AMDGPU.lrp(float %376, float %389, float %373) %393 = fsub float -0.000000e+00, %390 %394 = fadd float %393, %30 %395 = fsub float -0.000000e+00, %391 %396 = fadd float %395, %31 %397 = fsub float -0.000000e+00, %392 %398 = fadd float %397, %32 %399 = fmul float %37, %87 %400 = fsub float -0.000000e+00, %33 %401 = fadd float %85, %400 %402 = fsub float -0.000000e+00, %34 %403 = fadd float %86, %402 %404 = fsub float -0.000000e+00, %35 %405 = fadd float %399, %404 %406 = fmul float %405, %27 %407 = fmul float %406, 0x3FF7154CA0000000 %408 = call float @llvm.AMDIL.exp.(float %407) %409 = fsub float -0.000000e+00, %408 %410 = fadd float %409, 1.000000e+00 %411 = fmul float %401, %401 %412 = fmul float %403, %403 %413 = fadd float %412, %411 %414 = fmul float %405, %405 %415 = fadd float %413, %414 %416 = fdiv float 1.000000e+00, %405 %417 = fmul float %415, %28 %418 = fmul float %410, %417 %419 = fmul float %416, %418 %420 = fmul float %419, 0x3FF7154CA0000000 %421 = call float @llvm.AMDIL.exp.(float %420) %422 = call float @llvm.AMDIL.clamp.(float %421, float 0.000000e+00, float 1.000000e+00) %423 = fsub float -0.000000e+00, %422 %424 = fadd float %423, 1.000000e+00 %425 = fmul float %88, 5.000000e-01 %426 = fadd float %425, 5.000000e-01 %427 = bitcast float %426 to i32 %428 = bitcast float %29 to i32 %429 = insertelement <2 x i32> undef, i32 %427, i32 0 %430 = insertelement <2 x i32> %429, i32 %428, i32 1 %431 = bitcast <8 x i32> %59 to <32 x i8> %432 = bitcast <4 x i32> %61 to <16 x i8> %433 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %430, <32 x i8> %431, <16 x i8> %432, i32 2) %434 = extractelement <4 x float> %433, i32 0 %435 = fmul float %424, %434 %436 = fmul float %435, %394 %437 = fadd float %436, %390 %438 = fmul float %435, %396 %439 = fadd float %438, %391 %440 = fmul float %435, %398 %441 = fadd float %440, %392 %442 = fmul float %107, 5.000000e-01 %443 = fsub float -0.000000e+00, %36 %444 = fadd float %443, %87 %445 = fcmp oge float %444, 0.000000e+00 %446 = sext i1 %445 to i32 %447 = bitcast i32 %446 to float %448 = bitcast float %447 to i32 %449 = icmp ne i32 %448, 0 %. = select i1 %449, float %107, float %442 %450 = call i32 @llvm.SI.packf16(float %437, float %439) %451 = bitcast i32 %450 to float %452 = call i32 @llvm.SI.packf16(float %441, float %.) %453 = bitcast i32 %452 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %451, float %453, float %451, float %453) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v26, v0, 1, 1, [m0] ; C8680500 V_INTERP_P2_F32 v26, [v26], v1, 1, 1, [m0] ; C8690501 V_INTERP_P1_F32 v25, v0, 0, 1, [m0] ; C8640400 V_INTERP_P2_F32 v25, [v25], v1, 0, 1, [m0] ; C8650401 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x0 ; C0860500 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x0 ; C0C80700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800F00 00640219 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v6, v4 ; 7E0C4F04 V_MUL_LEGACY_F32_e32 v6, 2.200000e+00, v6 ; 0E0C0CFF 400CCCCD V_EXP_F32_e32 v17, v6 ; 7E224B06 V_LOG_F32_e32 v6, v3 ; 7E0C4F03 V_MUL_LEGACY_F32_e32 v6, 2.200000e+00, v6 ; 0E0C0CFF 400CCCCD V_EXP_F32_e32 v11, v6 ; 7E164B06 V_LOG_F32_e32 v6, v2 ; 7E0C4F02 V_MUL_LEGACY_F32_e32 v6, 2.200000e+00, v6 ; 0E0C0CFF 400CCCCD V_EXP_F32_e32 v10, v6 ; 7E144B06 V_LOG_F32_e32 v2, v5 ; 7E044F05 V_MUL_LEGACY_F32_e32 v2, 1.000000e+00, v2 ; 0E0404F2 V_EXP_F32_e32 v2, v2 ; 7E044B02 V_MOV_B32_e32 v3, 0x7fffffff ; 7E0602FF 7FFFFFFF V_AND_B32_e32 v2, v2, v3 ; 36040702 V_LOG_F32_e32 v2, v2 ; 7E044F02 V_MUL_LEGACY_F32_e32 v2, 2.200000e+00, v2 ; 0E0404FF 400CCCCD V_EXP_F32_e32 v2, v2 ; 7E044B02 V_ADD_F32_e32 v3, -1.000000e-01, v2 ; 060604FF BDCCCCCD V_CMP_LT_F32_e64 s[0:1], v3, 0.000000e+00, 0, 0 ; D0020000 00010103 V_CNDMASK_B32_e64 v3, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000003 00018280 V_OR_B32_e32 v3, v3, v3 ; 38060703 V_CMP_NE_I32_e64 s[0:1], v3, 0, 0, 0 ; D10A0000 00010103 V_INTERP_P1_F32 v6, v0, 1, 4, [m0] ; C8181100 V_INTERP_P2_F32 v6, [v6], v1, 1, 4, [m0] ; C8191101 V_INTERP_P1_F32 v8, v0, 0, 4, [m0] ; C8201000 V_INTERP_P2_F32 v8, [v8], v1, 0, 4, [m0] ; C8211001 V_INTERP_P1_F32 v4, v0, 1, 3, [m0] ; C8100D00 V_INTERP_P2_F32 v4, [v4], v1, 1, 3, [m0] ; C8110D01 V_INTERP_P1_F32 v3, v0, 2, 2, [m0] ; C80C0A00 V_INTERP_P2_F32 v3, [v3], v1, 2, 2, [m0] ; C80D0A01 V_INTERP_P1_F32 v5, v0, 1, 2, [m0] ; C8140900 V_INTERP_P2_F32 v5, [v5], v1, 1, 2, [m0] ; C8150901 V_INTERP_P1_F32 v7, v0, 0, 2, [m0] ; C81C0800 V_INTERP_P2_F32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 V_INTERP_P1_F32 v16, v0, 3, 1, [m0] ; C8400700 V_INTERP_P2_F32 v16, [v16], v1, 3, 1, [m0] ; C8410701 V_INTERP_P1_F32 v15, v0, 2, 1, [m0] ; C83C0600 V_INTERP_P2_F32 v15, [v15], v1, 2, 1, [m0] ; C83D0601 V_INTERP_P1_F32 v29, v0, 2, 0, [m0] ; C8740200 V_INTERP_P2_F32 v29, [v29], v1, 2, 0, [m0] ; C8750201 V_INTERP_P1_F32 v28, v0, 1, 0, [m0] ; C8700100 V_INTERP_P2_F32 v28, [v28], v1, 1, 0, [m0] ; C8710101 V_INTERP_P1_F32 v27, v0, 0, 0, [m0] ; C86C0000 V_INTERP_P2_F32 v27, [v27], v1, 0, 0, [m0] ; C86D0001 S_LOAD_DWORDX4 s[8:11], s[2:3], 0x0 ; C0840300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s2, s[8:11], 0x20 ; C2010920 S_BUFFER_LOAD_DWORD s3, s[8:11], 0x1c ; C201891C S_BUFFER_LOAD_DWORD s12, s[8:11], 0x1a ; C206091A S_BUFFER_LOAD_DWORD s13, s[8:11], 0x19 ; C2068919 S_BUFFER_LOAD_DWORD s14, s[8:11], 0x18 ; C2070918 S_BUFFER_LOAD_DWORD s15, s[8:11], 0x16 ; C2078916 S_BUFFER_LOAD_DWORD s16, s[8:11], 0x15 ; C2080915 S_BUFFER_LOAD_DWORD s17, s[8:11], 0x14 ; C2088914 S_BUFFER_LOAD_DWORD s18, s[8:11], 0x13 ; C2090913 S_BUFFER_LOAD_DWORD s19, s[8:11], 0x11 ; C2098911 S_BUFFER_LOAD_DWORD s20, s[8:11], 0x10 ; C20A0910 S_BUFFER_LOAD_DWORD s21, s[8:11], 0xe ; C20A890E S_BUFFER_LOAD_DWORD s22, s[8:11], 0xd ; C20B090D S_BUFFER_LOAD_DWORD s8, s[8:11], 0xc ; C204090C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v18, s2 ; 7E240202 V_MOV_B32_e32 v0, s3 ; 7E000203 V_MOV_B32_e32 v19, s12 ; 7E26020C V_MOV_B32_e32 v23, s13 ; 7E2E020D V_MOV_B32_e32 v22, s14 ; 7E2C020E V_MOV_B32_e32 v1, s15 ; 7E02020F V_MOV_B32_e32 v24, s16 ; 7E300210 V_MOV_B32_e32 v12, s17 ; 7E180211 V_MOV_B32_e32 v14, s18 ; 7E1C0212 V_MOV_B32_e32 v21, s19 ; 7E2A0213 V_MOV_B32_e32 v20, s20 ; 7E280214 V_MOV_B32_e32 v31, s21 ; 7E3E0215 V_MOV_B32_e32 v33, s22 ; 7E420216 V_MOV_B32_e32 v32, s8 ; 7E400208 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E V_MOV_B32_e32 v30, 0.000000e+00 ; 7E3C0280 V_CUBESC_F32 v35, v27, v28, v29, 0, 0 ; D28A0023 0476391B V_CUBETC_F32 v34, v27, v28, v29, 0, 0 ; D28C0022 0476391B V_CUBEMA_F32 v36, v27, v28, v29, 0, 0 ; D28E0024 0476391B V_CUBEID_F32 v37, v27, v28, v29, 0, 0 ; D2880025 0476391B V_MOV_B32_e32 v42, 0x7fffffff ; 7E5402FF 7FFFFFFF V_AND_B32_e32 v42, v36, v42 ; 36545524 V_RCP_F32_e32 v42, v42 ; 7E54552A V_MOV_B32_e32 v43, 1.500000e+00 ; 7E5602FF 3FC00000 V_MAD_F32 v36, v34, v42, v43, 0, 0 ; D2820024 04AE5522 V_MAD_F32 v35, v35, v42, v43, 0, 0 ; D2820023 04AE5523 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x8 ; C0800508 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x10 ; C0C40710 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[8:15], s[0:3] ; F0800F00 00022223 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v38, v37 ; 7E4C4F25 V_MUL_LEGACY_F32_e32 v38, 1.000000e+00, v38 ; 0E4C4CF2 V_EXP_F32_e32 v41, v38 ; 7E524B26 V_SUB_F32_e32 v33, v33, v5 ; 08420B21 V_SUB_F32_e32 v32, v32, v7 ; 08400F20 V_MUL_F32_e32 v42, v32, v32 ; 10544120 V_MAD_F32 v42, v33, v33, v42, 0, 0 ; D282002A 04AA4321 V_SUB_F32_e32 v31, v31, v3 ; 083E071F V_MAD_F32 v42, v31, v31, v42, 0, 0 ; D282002A 04AA3F1F V_MOV_B32_e32 v44, 1.000000e-07 ; 7E5802FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v42, v44 ; 7C0C592A V_CMP_U_F32_e64 s[0:1], v42, v42, 0, 0 ; D0100000 0002552A V_CNDMASK_B32_e64 v44, 0, -1, vcc, 0, 0, 0, 0 ; D200002C 01A98280 V_CNDMASK_B32_e64 v45, 0, -1, s[0:1], 0, 0, 0, 0 ; D200002D 00018280 V_OR_B32_e32 v44, v44, v45 ; 38585B2C V_MOV_B32_e32 v45, 0x33d6bf95 ; 7E5A02FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v44, 0, 0, 0 ; D10A0000 0001012C V_CNDMASK_B32_e64 v42, v45, v42, s[0:1], 0, 0, 0, 0 ; D200002A 0002552D V_RSQ_CLAMP_F32_e32 v42, v42 ; 7E54592A V_MUL_F32_e32 v44, v33, v42 ; 10585521 V_MUL_F32_e32 v45, v32, v42 ; 105A5520 V_MUL_F32_e32 v46, v27, v45 ; 105C5B1B V_MAD_F32 v46, v28, v44, v46, 0, 0 ; D282002E 04BA591C V_MUL_F32_e32 v47, v31, v42 ; 105E551F V_MAD_F32 v46, v29, v47, v46, 0, 0 ; D282002E 04BA5F1D V_ADD_F32_e32 v46, v46, v46 ; 065C5D2E V_MUL_F32_e32 v48, v46, v29 ; 10603B2E V_SUB_F32_e32 v40, v48, v47 ; 08505F30 V_MUL_F32_e32 v47, v46, v28 ; 105E392E V_SUB_F32_e32 v39, v47, v44 ; 084E592F V_MUL_F32_e32 v44, v46, v27 ; 1058372E V_SUB_F32_e32 v38, v44, v45 ; 084C5B2C V_CUBESC_F32 v45, v38, v39, v40, 0, 0 ; D28A002D 04A24F26 V_CUBETC_F32 v44, v38, v39, v40, 0, 0 ; D28C002C 04A24F26 V_CUBEMA_F32 v46, v38, v39, v40, 0, 0 ; D28E002E 04A24F26 V_CUBEID_F32 v47, v38, v39, v40, 0, 0 ; D288002F 04A24F26 V_MOV_B32_e32 v52, 0x7fffffff ; 7E6802FF 7FFFFFFF V_AND_B32_e32 v52, v46, v52 ; 3668692E V_RCP_F32_e32 v52, v52 ; 7E685534 V_MAD_F32 v46, v44, v52, v43, 0, 0 ; D282002E 04AE692C V_MAD_F32 v45, v45, v52, v43, 0, 0 ; D282002D 04AE692D S_LOAD_DWORDX4 s[0:3], s[4:5], 0x10 ; C0800510 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x20 ; C0C40720 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[44:46], 7, 0, 0, 0, 0, 0, 0, 0, v[45:48], s[8:15], s[0:3] ; F0800700 00022C2D S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v47, v46 ; 7E5E4F2E V_MUL_LEGACY_F32_e32 v47, 2.200000e+00, v47 ; 0E5E5EFF 400CCCCD V_EXP_F32_e32 v47, v47 ; 7E5E4B2F S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x8 ; C0C40708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[48:51], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[8:15], s[0:3] ; F0800F00 00023019 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v25, v51 ; 7E324F33 V_MUL_LEGACY_F32_e32 v25, 1.000000e+00, v25 ; 0E3232F2 V_EXP_F32_e32 v25, v25 ; 7E324B19 V_MOV_B32_e32 v26, 0x7fffffff ; 7E3402FF 7FFFFFFF V_AND_B32_e32 v25, v25, v26 ; 36323519 V_LOG_F32_e32 v25, v25 ; 7E324F19 V_MUL_LEGACY_F32_e32 v25, 2.200000e+00, v25 ; 0E3232FF 400CCCCD V_EXP_F32_e32 v25, v25 ; 7E324B19 V_SUB_F32_e32 v26, 1.000000e+00, v25 ; 083432F2 V_MUL_F32_e32 v47, v26, v47 ; 105E5F1A V_MOV_B32_e32 v41, v40 ; 7E520328 V_CUBESC_F32 v53, v38, v39, v40, 0, 0 ; D28A0035 04A24F26 V_CUBETC_F32 v52, v38, v39, v40, 0, 0 ; D28C0034 04A24F26 V_CUBEMA_F32 v54, v38, v39, v40, 0, 0 ; D28E0036 04A24F26 V_CUBEID_F32 v55, v38, v39, v40, 0, 0 ; D2880037 04A24F26 V_MOV_B32_e32 v38, 0x7fffffff ; 7E4C02FF 7FFFFFFF V_AND_B32_e32 v38, v54, v38 ; 364C4D36 V_RCP_F32_e32 v38, v38 ; 7E4C5526 V_MAD_F32 v54, v52, v38, v43, 0, 0 ; D2820036 04AE4D34 V_MAD_F32 v53, v53, v38, v43, 0, 0 ; D2820035 04AE4D35 S_LOAD_DWORDX4 s[0:3], s[4:5], 0xc ; C080050C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x18 ; C0C40718 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[38:40], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[8:15], s[0:3] ; F0800700 00022635 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v41, v40 ; 7E524F28 V_MUL_LEGACY_F32_e32 v41, 2.200000e+00, v41 ; 0E5252FF 400CCCCD V_EXP_F32_e32 v41, v41 ; 7E524B29 V_MAD_F32 v41, v25, v41, v47, 0, 0 ; D2820029 04BE5319 V_MOV_B32_e32 v43, 5.290000e-02 ; 7E5602FF 3D58ADAC V_MAD_F32 v32, v32, v42, v43, 0, 0 ; D2820020 04AE5520 V_MOV_B32_e32 v43, 8.460000e-01 ; 7E5602FF 3F589375 V_MAD_F32 v33, v33, v42, v43, 0, 0 ; D2820021 04AE5521 V_MUL_F32_e32 v43, v33, v33 ; 10564321 V_MAD_F32 v43, v32, v32, v43, 0, 0 ; D282002B 04AE4120 V_MOV_B32_e32 v47, 5.290000e-01 ; 7E5E02FF 3F076C8B V_MAD_F32 v31, v31, v42, v47, 0, 0 ; D282001F 04BE551F V_MAD_F32 v42, v31, v31, v43, 0, 0 ; D282002A 04AE3F1F V_RSQ_CLAMP_F32_e32 v42, v42 ; 7E54592A V_MUL_F32_e32 v33, v33, v42 ; 10425521 V_MUL_F32_e32 v32, v32, v42 ; 10405520 V_MUL_F32_e32 v32, v32, v27 ; 10403720 V_MAD_F32 v32, v33, v28, v32, 0, 0 ; D2820020 04823921 V_MUL_F32_e32 v31, v31, v42 ; 103E551F V_MAD_F32 v27, v31, v29, v32, 0, 0 ; D282001B 04823B1F V_ADD_F32_e64 v27, v27, 0, 1, 0 ; D206081B 0001011B V_MOV_B32_e32 v28, 0x7fffffff ; 7E3802FF 7FFFFFFF V_AND_B32_e32 v27, v27, v28 ; 3636391B V_LOG_F32_e32 v27, v27 ; 7E364F1B V_MOV_B32_e32 v28, 1.990000e+02 ; 7E3802FF 43470000 V_MAD_F32 v28, v25, v28, 1.000000e+00, 0, 0 ; D282001C 03CA3919 V_MUL_LEGACY_F32_e32 v27, v28, v27 ; 0E36371C V_EXP_F32_e32 v27, v27 ; 7E364B1B V_MUL_F32_e32 v28, 1.000000e-01, v28 ; 103838FF 3DCCCCCD V_MAD_F32 v29, v28, v27, v41, 0, 0 ; D282001D 04A6371C V_LOG_F32_e32 v30, v36 ; 7E3C4F24 V_MUL_LEGACY_F32_e32 v30, 2.200000e+00, v30 ; 0E3C3CFF 400CCCCD V_EXP_F32_e32 v30, v30 ; 7E3C4B1E V_MUL_F32_e32 v31, v17, v30 ; 103E3D11 V_MAD_F32 v17, v17, v30, v31, 0, 0 ; D2820011 047E3D11 V_LOG_F32_e32 v30, v50 ; 7E3C4F32 V_MUL_LEGACY_F32_e32 v30, 2.200000e+00, v30 ; 0E3C3CFF 400CCCCD V_EXP_F32_e32 v30, v30 ; 7E3C4B1E V_MAD_F32 v17, v29, v30, v17, 0, 0 ; D2820011 04463D1D V_MOV_B32_e32 v29, 0x7fffffff ; 7E3A02FF 7FFFFFFF V_AND_B32_e32 v17, v17, v29 ; 36223B11 V_LOG_F32_e32 v17, v17 ; 7E224F11 V_MUL_F32_e32 v17, 4.545450e-01, v17 ; 102222FF 3EE8BA1F V_EXP_F32_e32 v31, v17 ; 7E3E4B11 V_LOG_F32_e32 v17, v45 ; 7E224F2D V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_MUL_F32_e32 v17, v26, v17 ; 1022231A V_LOG_F32_e32 v33, v39 ; 7E424F27 V_MUL_LEGACY_F32_e32 v33, 2.200000e+00, v33 ; 0E4242FF 400CCCCD V_EXP_F32_e32 v33, v33 ; 7E424B21 V_MAD_F32 v17, v25, v33, v17, 0, 0 ; D2820011 04464319 V_MAD_F32 v17, v28, v27, v17, 0, 0 ; D2820011 0446371C V_LOG_F32_e32 v33, v35 ; 7E424F23 V_MUL_LEGACY_F32_e32 v33, 2.200000e+00, v33 ; 0E4242FF 400CCCCD V_EXP_F32_e32 v33, v33 ; 7E424B21 V_MUL_F32_e32 v41, v11, v33 ; 1052430B V_MAD_F32 v11, v11, v33, v41, 0, 0 ; D282000B 04A6430B V_LOG_F32_e32 v33, v49 ; 7E424F31 V_MUL_LEGACY_F32_e32 v33, 2.200000e+00, v33 ; 0E4242FF 400CCCCD V_EXP_F32_e32 v33, v33 ; 7E424B21 V_MAD_F32 v11, v17, v33, v11, 0, 0 ; D282000B 042E4311 V_MOV_B32_e32 v17, 0x7fffffff ; 7E2202FF 7FFFFFFF V_AND_B32_e32 v11, v11, v17 ; 3616230B V_LOG_F32_e32 v11, v11 ; 7E164F0B V_MUL_F32_e32 v11, 4.545450e-01, v11 ; 101616FF 3EE8BA1F V_EXP_F32_e32 v30, v11 ; 7E3C4B0B V_LOG_F32_e32 v11, v44 ; 7E164F2C V_MUL_LEGACY_F32_e32 v11, 2.200000e+00, v11 ; 0E1616FF 400CCCCD V_EXP_F32_e32 v11, v11 ; 7E164B0B V_MUL_F32_e32 v11, v26, v11 ; 1016171A V_LOG_F32_e32 v17, v38 ; 7E224F26 V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_MAD_F32 v11, v25, v17, v11, 0, 0 ; D282000B 042E2319 V_MAD_F32 v11, v28, v27, v11, 0, 0 ; D282000B 042E371C V_LOG_F32_e32 v17, v34 ; 7E224F22 V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_MUL_F32_e32 v25, v10, v17 ; 1032230A V_MAD_F32 v10, v10, v17, v25, 0, 0 ; D282000A 0466230A V_LOG_F32_e32 v17, v48 ; 7E224F30 V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_MAD_F32 v10, v11, v17, v10, 0, 0 ; D282000A 042A230B V_MOV_B32_e32 v11, 0x7fffffff ; 7E1602FF 7FFFFFFF V_AND_B32_e32 v10, v10, v11 ; 3614170A V_LOG_F32_e32 v10, v10 ; 7E144F0A V_MUL_F32_e32 v10, 4.545450e-01, v10 ; 101414FF 3EE8BA1F V_EXP_F32_e32 v29, v10 ; 7E3A4B0A S_LOAD_DWORDX4 s[0:3], s[4:5], 0x18 ; C0800518 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x30 ; C0C40730 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[8:15], s[0:3] ; F0800700 0002191D S_LOAD_DWORDX4 s[0:3], s[4:5], 0x20 ; C0800520 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x40 ; C0C40740 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 IMAGE_SAMPLE v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[8:15], s[0:3] ; F0800700 00020F0F V_SUB_F32_e32 v9, 1.000000e+00, v6 ; 08120CF2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x1c ; C080051C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x38 ; C0C40738 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 IMAGE_SAMPLE v[8:9], 5, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800500 00020808 S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v6, 1.000000e+00, v8 ; 080C10F2 V_MUL_F32_e32 v10, v6, v17 ; 10142306 V_MAD_F32 v30, v8, v27, v10, 0, 0 ; D282001E 042A3708 V_MUL_F32_e32 v10, v6, v16 ; 10142106 V_MAD_F32 v29, v8, v26, v10, 0, 0 ; D282001D 042A3508 V_MUL_F32_e32 v6, v6, v15 ; 100C1F06 V_MAD_F32 v28, v8, v25, v6, 0, 0 ; D282001C 041A3308 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x24 ; C0800524 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x48 ; C0C40748 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[8:15], s[0:3] ; F0800700 00020F1C V_MUL_F32_e32 v6, v9, v8 ; 100C1109 V_SUB_F32_e32 v6, v8, v6 ; 080C0D08 V_SUB_F32_e32 v8, 1.000000e+00, v6 ; 08100CF2 V_MUL_F32_e32 v9, v8, v29 ; 10123B08 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v9, v6, v16, v9, 0, 0 ; D2820009 04262106 V_SUB_F32_e32 v10, v24, v9 ; 08141318 V_SUB_F32_e32 v5, v5, v23 ; 080A2F05 V_SUB_F32_e32 v7, v7, v22 ; 080E2D07 V_MUL_F32_e32 v7, v7, v7 ; 100E0F07 V_MAD_F32 v5, v5, v5, v7, 0, 0 ; D2820005 041E0B05 V_MUL_F32_e32 v7, v18, v3 ; 100E0712 V_SUB_F32_e32 v7, v7, v19 ; 080E2707 V_MAD_F32 v5, v7, v7, v5, 0, 0 ; D2820005 04160F07 V_MUL_F32_e32 v5, v5, v21 ; 100A2B05 V_MUL_F32_e32 v11, v7, v20 ; 10162907 V_MUL_F32_e32 v11, 1.442700e+00, v11 ; 101616FF 3FB8AA65 V_EXP_F32_e32 v11, v11 ; 7E164B0B V_SUB_F32_e32 v11, 1.000000e+00, v11 ; 081616F2 V_MUL_F32_e32 v5, v11, v5 ; 100A0B0B V_RCP_F32_e32 v7, v7 ; 7E0E5507 V_MUL_F32_e32 v5, v7, v5 ; 100A0B07 V_MUL_F32_e32 v5, 1.442700e+00, v5 ; 100A0AFF 3FB8AA65 V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_ADD_F32_e64 v5, v5, 0, 1, 0 ; D2060805 00010105 V_SUB_F32_e32 v5, 1.000000e+00, v5 ; 080A0AF2 V_MAD_F32 v13, v4, 5.000000e-01, 5.000000e-01, 0, 0 ; D282000D 03C1E104 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x14 ; C0800514 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x28 ; C0C20728 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v4, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[4:11], s[0:3] ; F0800100 0001040D S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v5, v4 ; 10080905 V_MAD_F32 v5, v4, v10, v9, 0, 0 ; D2820005 04261504 V_MUL_F32_e32 v7, v8, v28 ; 100E3908 V_MAD_F32 v7, v6, v15, v7, 0, 0 ; D2820007 041E1F06 V_SUB_F32_e32 v9, v12, v7 ; 08120F0C V_MAD_F32 v7, v4, v9, v7, 0, 0 ; D2820007 041E1304 V_CVT_PKRTZ_F16_F32_e32 v5, v7, v5 ; 5E0A0B07 V_MUL_F32_e32 v7, v8, v30 ; 100E3D08 V_MAD_F32 v6, v6, v17, v7, 0, 0 ; D2820006 041E2306 V_SUB_F32_e32 v1, v1, v6 ; 08020D01 V_MAD_F32 v1, v4, v1, v6, 0, 0 ; D2820001 041A0304 V_SUB_F32_e32 v0, v3, v0 ; 08000103 V_CMP_GE_F32_e64 s[0:1], v0, 0.000000e+00, 0, 0 ; D00C0000 00010100 V_MUL_F32_e32 v0, 5.000000e-01, v2 ; 100004F0 V_CNDMASK_B32_e64 v0, v0, v2, s[0:1], 0, 0, 0, 0 ; D2000000 00020500 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 EXP 15, 0, 1, 1, 1, v5, v0, v5, v0 ; F8001C0F 00050005 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL CONST[0..104] DCL TEMP[0..6], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} IMM[2] FLT32 { 0.5000, -0.5000, 0.0000, 0.0000} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+9] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[3].x, TEMP[0].xxxx 12: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 13: UARL ADDR[0].x, TEMP[3].xxxx 14: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 15: MOV TEMP[2].z, TEMP[3].xxxx 16: MUL TEMP[3].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[3].xyzx 18: F2I TEMP[3].x, TEMP[0].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: UARL ADDR[0].x, TEMP[3].xxxx 21: DP3 TEMP[3].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 22: F2I TEMP[4].x, TEMP[0].xxxx 23: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 24: UARL ADDR[0].x, TEMP[4].xxxx 25: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 26: MOV TEMP[3].y, TEMP[4].xxxx 27: F2I TEMP[0].x, TEMP[0].xxxx 28: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 29: UARL ADDR[0].x, TEMP[0].xxxx 30: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 31: MOV TEMP[3].z, TEMP[0].xxxx 32: MUL TEMP[0].xyz, TEMP[3], IN[1].xxxx 33: MOV TEMP[3].xyz, TEMP[0].xyzx 34: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 35: UIF TEMP[0].xxxx :0 36: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 37: MOV TEMP[2].w, TEMP[0].wwww 38: F2I TEMP[4].x, TEMP[0].wwww 39: UARL ADDR[0].x, TEMP[4].xxxx 40: UARL ADDR[0].x, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 42: F2I TEMP[5].x, TEMP[0].wwww 43: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 44: UARL ADDR[0].x, TEMP[5].xxxx 45: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 46: MOV TEMP[4].y, TEMP[5].xxxx 47: F2I TEMP[5].x, TEMP[0].wwww 48: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 49: UARL ADDR[0].x, TEMP[5].xxxx 50: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 51: MOV TEMP[4].z, TEMP[5].xxxx 52: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[4], TEMP[2] 53: MOV TEMP[2].xyz, TEMP[5].xyzx 54: F2I TEMP[5].x, TEMP[0].wwww 55: UARL ADDR[0].x, TEMP[5].xxxx 56: UARL ADDR[0].x, TEMP[5].xxxx 57: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 58: F2I TEMP[5].x, TEMP[0].wwww 59: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 60: UARL ADDR[0].x, TEMP[5].xxxx 61: DP3 TEMP[5].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 62: MOV TEMP[4].y, TEMP[5].xxxx 63: F2I TEMP[0].x, TEMP[0].wwww 64: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 65: UARL ADDR[0].x, TEMP[0].xxxx 66: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 67: MOV TEMP[4].z, TEMP[0].xxxx 68: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[4], TEMP[3] 69: MOV TEMP[3].xyz, TEMP[0].xyzx 70: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 71: UIF TEMP[0].xxxx :0 72: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 73: MOV TEMP[2].w, TEMP[0].wwww 74: F2I TEMP[5].x, TEMP[0].wwww 75: UARL ADDR[0].x, TEMP[5].xxxx 76: UARL ADDR[0].x, TEMP[5].xxxx 77: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 78: F2I TEMP[5].x, TEMP[0].wwww 79: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 80: UARL ADDR[0].x, TEMP[5].xxxx 81: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 82: MOV TEMP[4].y, TEMP[5].xxxx 83: F2I TEMP[5].x, TEMP[0].wwww 84: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 85: UARL ADDR[0].x, TEMP[5].xxxx 86: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 87: MOV TEMP[4].z, TEMP[5].xxxx 88: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[4], TEMP[2] 89: MOV TEMP[2].xyz, TEMP[4].xyzx 90: F2I TEMP[4].x, TEMP[0].wwww 91: UARL ADDR[0].x, TEMP[4].xxxx 92: UARL ADDR[0].x, TEMP[4].xxxx 93: DP3 TEMP[1].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 94: F2I TEMP[4].x, TEMP[0].wwww 95: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 96: UARL ADDR[0].x, TEMP[4].xxxx 97: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 98: MOV TEMP[1].y, TEMP[4].xxxx 99: F2I TEMP[0].x, TEMP[0].wwww 100: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 101: UARL ADDR[0].x, TEMP[0].xxxx 102: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 103: MOV TEMP[1].z, TEMP[0].xxxx 104: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[1], TEMP[3] 105: MOV TEMP[3].xyz, TEMP[0].xyzx 106: ENDIF 107: ENDIF 108: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 109: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 110: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 111: ADD TEMP[1], TEMP[1], CONST[3] 112: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 113: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 114: RSQ TEMP[0].x, TEMP[0].xxxx 115: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[3] 116: MOV TEMP[0].xyz, TEMP[0].xyzx 117: RCP TEMP[4].x, TEMP[1].wwww 118: MOV TEMP[2].w, TEMP[4].xxxx 119: MUL TEMP[4].xy, TEMP[1], TEMP[4].xxxx 120: MOV TEMP[4].xy, TEMP[4].xyxx 121: ADD TEMP[5].xy, TEMP[2], CONST[8].zwzw 122: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 123: MOV TEMP[5].zw, TEMP[5].wwzw 124: MUL TEMP[6].xy, TEMP[2].yyyy, CONST[5] 125: MOV TEMP[3].xy, TEMP[6].xyxx 126: MAD TEMP[6].xy, TEMP[2].xxxx, CONST[4], TEMP[3] 127: MOV TEMP[3].xy, TEMP[6].xyxx 128: MAD TEMP[6].xy, TEMP[2].zzzz, CONST[6], TEMP[3] 129: MOV TEMP[3].xy, TEMP[6].xyxx 130: ADD TEMP[6].xy, TEMP[3], CONST[7] 131: MOV TEMP[3].xy, TEMP[6].xyxx 132: MAD TEMP[3].xy, TEMP[3], IMM[2].xyzz, IMM[2].yyyy 133: MOV TEMP[3].xy, TEMP[3].xyxx 134: MOV TEMP[5].xy, IN[4].xyxx 135: MOV TEMP[2].xyz, TEMP[2].xyzx 136: MOV TEMP[2].w, IMM[0].yyyy 137: MOV TEMP[4].zw, IMM[0].yyzy 138: MOV TEMP[3].zw, IMM[0].yyzy 139: MOV OUT[2], TEMP[5] 140: MOV OUT[3], TEMP[2] 141: MOV OUT[0], TEMP[1] 142: MOV OUT[1], TEMP[0] 143: MOV OUT[4], TEMP[4] 144: MOV OUT[5], TEMP[3] 145: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0 %50 = add i32 %5, %7 %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %50) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0 %57 = add i32 %5, %7 %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %56, i32 0, i32 %57) %59 = extractelement <4 x float> %58, i32 0 %60 = extractelement <4 x float> %58, i32 1 %61 = extractelement <4 x float> %58, i32 2 %62 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = fmul float 3.000000e+00, %59 %76 = fmul float %45, 1.000000e+00 %77 = fadd float %76, 0.000000e+00 %78 = fmul float %46, 1.000000e+00 %79 = fadd float %78, 0.000000e+00 %80 = fmul float %47, 1.000000e+00 %81 = fadd float %80, 0.000000e+00 %82 = fmul float %45, 0.000000e+00 %83 = fadd float %82, 1.000000e+00 %84 = fptosi float %75 to i32 %85 = bitcast i32 %84 to float %86 = bitcast float %85 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 144 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = shl i32 %86, 4 %91 = add i32 %90, 148 %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %91) %93 = shl i32 %86, 4 %94 = add i32 %93, 152 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %86, 4 %97 = add i32 %96, 156 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = fmul float %77, %89 %100 = fmul float %79, %92 %101 = fadd float %99, %100 %102 = fmul float %81, %95 %103 = fadd float %101, %102 %104 = fmul float %83, %98 %105 = fadd float %103, %104 %106 = fptosi float %75 to i32 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = add i32 1, %108 %110 = bitcast i32 %109 to float %111 = bitcast float %110 to i32 %112 = shl i32 %111, 4 %113 = add i32 %112, 144 %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %113) %115 = shl i32 %111, 4 %116 = add i32 %115, 148 %117 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %116) %118 = shl i32 %111, 4 %119 = add i32 %118, 152 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %111, 4 %122 = add i32 %121, 156 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = fmul float %77, %114 %125 = fmul float %79, %117 %126 = fadd float %124, %125 %127 = fmul float %81, %120 %128 = fadd float %126, %127 %129 = fmul float %83, %123 %130 = fadd float %128, %129 %131 = fptosi float %75 to i32 %132 = bitcast i32 %131 to float %133 = bitcast float %132 to i32 %134 = add i32 2, %133 %135 = bitcast i32 %134 to float %136 = bitcast float %135 to i32 %137 = shl i32 %136, 4 %138 = add i32 %137, 144 %139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %138) %140 = shl i32 %136, 4 %141 = add i32 %140, 148 %142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %141) %143 = shl i32 %136, 4 %144 = add i32 %143, 152 %145 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %144) %146 = shl i32 %136, 4 %147 = add i32 %146, 156 %148 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %147) %149 = fmul float %77, %139 %150 = fmul float %79, %142 %151 = fadd float %149, %150 %152 = fmul float %81, %145 %153 = fadd float %151, %152 %154 = fmul float %83, %148 %155 = fadd float %153, %154 %156 = fmul float %105, %52 %157 = fmul float %130, %52 %158 = fmul float %155, %52 %159 = fptosi float %75 to i32 %160 = bitcast i32 %159 to float %161 = bitcast float %160 to i32 %162 = shl i32 %161, 4 %163 = add i32 %162, 144 %164 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %163) %165 = shl i32 %161, 4 %166 = add i32 %165, 148 %167 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %166) %168 = shl i32 %161, 4 %169 = add i32 %168, 152 %170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %169) %171 = fmul float %66, %164 %172 = fmul float %67, %167 %173 = fadd float %172, %171 %174 = fmul float %68, %170 %175 = fadd float %173, %174 %176 = fptosi float %75 to i32 %177 = bitcast i32 %176 to float %178 = bitcast float %177 to i32 %179 = add i32 1, %178 %180 = bitcast i32 %179 to float %181 = bitcast float %180 to i32 %182 = shl i32 %181, 4 %183 = add i32 %182, 144 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = shl i32 %181, 4 %186 = add i32 %185, 148 %187 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %186) %188 = shl i32 %181, 4 %189 = add i32 %188, 152 %190 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %189) %191 = fmul float %66, %184 %192 = fmul float %67, %187 %193 = fadd float %192, %191 %194 = fmul float %68, %190 %195 = fadd float %193, %194 %196 = fptosi float %75 to i32 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = add i32 2, %198 %200 = bitcast i32 %199 to float %201 = bitcast float %200 to i32 %202 = shl i32 %201, 4 %203 = add i32 %202, 144 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %201, 4 %206 = add i32 %205, 148 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %201, 4 %209 = add i32 %208, 152 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %66, %204 %212 = fmul float %67, %207 %213 = fadd float %212, %211 %214 = fmul float %68, %210 %215 = fadd float %213, %214 %216 = fmul float %175, %52 %217 = fmul float %195, %52 %218 = fmul float %215, %52 %219 = fcmp olt float 0.000000e+00, %53 %220 = sext i1 %219 to i32 %221 = bitcast i32 %220 to float %222 = bitcast float %221 to i32 %223 = icmp ne i32 %222, 0 br i1 %223, label %IF, label %ENDIF IF: ; preds = %main_body %224 = fmul float 3.000000e+00, %60 %225 = fptosi float %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = shl i32 %227, 4 %229 = add i32 %228, 144 %230 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %229) %231 = shl i32 %227, 4 %232 = add i32 %231, 148 %233 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %232) %234 = shl i32 %227, 4 %235 = add i32 %234, 152 %236 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %235) %237 = shl i32 %227, 4 %238 = add i32 %237, 156 %239 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %238) %240 = fmul float %77, %230 %241 = fmul float %79, %233 %242 = fadd float %240, %241 %243 = fmul float %81, %236 %244 = fadd float %242, %243 %245 = fmul float %83, %239 %246 = fadd float %244, %245 %247 = fptosi float %224 to i32 %248 = bitcast i32 %247 to float %249 = bitcast float %248 to i32 %250 = add i32 1, %249 %251 = bitcast i32 %250 to float %252 = bitcast float %251 to i32 %253 = shl i32 %252, 4 %254 = add i32 %253, 144 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = shl i32 %252, 4 %257 = add i32 %256, 148 %258 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %257) %259 = shl i32 %252, 4 %260 = add i32 %259, 152 %261 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %260) %262 = shl i32 %252, 4 %263 = add i32 %262, 156 %264 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %263) %265 = fmul float %77, %255 %266 = fmul float %79, %258 %267 = fadd float %265, %266 %268 = fmul float %81, %261 %269 = fadd float %267, %268 %270 = fmul float %83, %264 %271 = fadd float %269, %270 %272 = fptosi float %224 to i32 %273 = bitcast i32 %272 to float %274 = bitcast float %273 to i32 %275 = add i32 2, %274 %276 = bitcast i32 %275 to float %277 = bitcast float %276 to i32 %278 = shl i32 %277, 4 %279 = add i32 %278, 144 %280 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %279) %281 = shl i32 %277, 4 %282 = add i32 %281, 148 %283 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %282) %284 = shl i32 %277, 4 %285 = add i32 %284, 152 %286 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %285) %287 = shl i32 %277, 4 %288 = add i32 %287, 156 %289 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %288) %290 = fmul float %77, %280 %291 = fmul float %79, %283 %292 = fadd float %290, %291 %293 = fmul float %81, %286 %294 = fadd float %292, %293 %295 = fmul float %83, %289 %296 = fadd float %294, %295 %297 = fmul float %53, %246 %298 = fadd float %297, %156 %299 = fmul float %53, %271 %300 = fadd float %299, %157 %301 = fmul float %53, %296 %302 = fadd float %301, %158 %303 = fptosi float %224 to i32 %304 = bitcast i32 %303 to float %305 = bitcast float %304 to i32 %306 = shl i32 %305, 4 %307 = add i32 %306, 144 %308 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %307) %309 = shl i32 %305, 4 %310 = add i32 %309, 148 %311 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %310) %312 = shl i32 %305, 4 %313 = add i32 %312, 152 %314 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %313) %315 = fmul float %66, %308 %316 = fmul float %67, %311 %317 = fadd float %316, %315 %318 = fmul float %68, %314 %319 = fadd float %317, %318 %320 = fptosi float %224 to i32 %321 = bitcast i32 %320 to float %322 = bitcast float %321 to i32 %323 = add i32 1, %322 %324 = bitcast i32 %323 to float %325 = bitcast float %324 to i32 %326 = shl i32 %325, 4 %327 = add i32 %326, 144 %328 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %327) %329 = shl i32 %325, 4 %330 = add i32 %329, 148 %331 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %330) %332 = shl i32 %325, 4 %333 = add i32 %332, 152 %334 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %333) %335 = fmul float %66, %328 %336 = fmul float %67, %331 %337 = fadd float %336, %335 %338 = fmul float %68, %334 %339 = fadd float %337, %338 %340 = fptosi float %224 to i32 %341 = bitcast i32 %340 to float %342 = bitcast float %341 to i32 %343 = add i32 2, %342 %344 = bitcast i32 %343 to float %345 = bitcast float %344 to i32 %346 = shl i32 %345, 4 %347 = add i32 %346, 144 %348 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %347) %349 = shl i32 %345, 4 %350 = add i32 %349, 148 %351 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %350) %352 = shl i32 %345, 4 %353 = add i32 %352, 152 %354 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %353) %355 = fmul float %66, %348 %356 = fmul float %67, %351 %357 = fadd float %356, %355 %358 = fmul float %68, %354 %359 = fadd float %357, %358 %360 = fmul float %53, %319 %361 = fadd float %360, %216 %362 = fmul float %53, %339 %363 = fadd float %362, %217 %364 = fmul float %53, %359 %365 = fadd float %364, %218 %366 = fcmp olt float 0.000000e+00, %54 %367 = sext i1 %366 to i32 %368 = bitcast i32 %367 to float %369 = bitcast float %368 to i32 %370 = icmp ne i32 %369, 0 br i1 %370, label %IF70, label %ENDIF ENDIF: ; preds = %IF70, %IF, %main_body %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %429, %IF70 ], [ %224, %IF ] %temp8.0 = phi float [ %156, %main_body ], [ %503, %IF70 ], [ %298, %IF ] %temp9.0 = phi float [ %157, %main_body ], [ %505, %IF70 ], [ %300, %IF ] %temp10.0 = phi float [ %158, %main_body ], [ %507, %IF70 ], [ %302, %IF ] %temp12.0 = phi float [ %216, %main_body ], [ %566, %IF70 ], [ %361, %IF ] %temp13.0 = phi float [ %217, %main_body ], [ %568, %IF70 ], [ %363, %IF ] %temp14.0 = phi float [ %218, %main_body ], [ %570, %IF70 ], [ %365, %IF ] %371 = fmul float %temp9.0, %17 %372 = fmul float %temp9.0, %18 %373 = fmul float %temp9.0, %19 %374 = fmul float %temp9.0, %20 %375 = fmul float %temp8.0, %13 %376 = fadd float %375, %371 %377 = fmul float %temp8.0, %14 %378 = fadd float %377, %372 %379 = fmul float %temp8.0, %15 %380 = fadd float %379, %373 %381 = fmul float %temp8.0, %16 %382 = fadd float %381, %374 %383 = fmul float %temp10.0, %21 %384 = fadd float %383, %376 %385 = fmul float %temp10.0, %22 %386 = fadd float %385, %378 %387 = fmul float %temp10.0, %23 %388 = fadd float %387, %380 %389 = fmul float %temp10.0, %24 %390 = fadd float %389, %382 %391 = fadd float %384, %25 %392 = fadd float %386, %26 %393 = fadd float %388, %27 %394 = fadd float %390, %28 %395 = fmul float %temp12.0, %temp12.0 %396 = fmul float %temp13.0, %temp13.0 %397 = fadd float %396, %395 %398 = fmul float %temp14.0, %temp14.0 %399 = fadd float %397, %398 %400 = fcmp uge float %399, 0x3E7AD7F2A0000000 %401 = select i1 %400, float %399, float 0x3E7AD7F2A0000000 %402 = call float @llvm.AMDGPU.rsq.clamped.f32(float %401) %403 = fmul float %402, %temp12.0 %404 = fmul float %402, %temp13.0 %405 = fmul float %402, %temp14.0 %406 = fdiv float 1.000000e+00, %394 %407 = fmul float %391, %406 %408 = fmul float %392, %406 %409 = fadd float %temp8.0, %39 %410 = fadd float %temp9.0, %40 %411 = fmul float %409, %37 %412 = fmul float %410, %38 %413 = fmul float %temp9.0, %31 %414 = fmul float %temp9.0, %32 %415 = fmul float %temp8.0, %29 %416 = fadd float %415, %413 %417 = fmul float %temp8.0, %30 %418 = fadd float %417, %414 %419 = fmul float %temp10.0, %33 %420 = fadd float %419, %416 %421 = fmul float %temp10.0, %34 %422 = fadd float %421, %418 %423 = fadd float %420, %35 %424 = fadd float %422, %36 %425 = fmul float %423, 5.000000e-01 %426 = fadd float %425, -5.000000e-01 %427 = fmul float %424, -5.000000e-01 %428 = fadd float %427, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %403, float %404, float %405, float %temp3.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %73, float %74, float %411, float %412) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp8.0, float %temp9.0, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %407, float %408, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %426, float %428, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %391, float %392, float %393, float %394) ret void IF70: ; preds = %IF %429 = fmul float 3.000000e+00, %61 %430 = fptosi float %429 to i32 %431 = bitcast i32 %430 to float %432 = bitcast float %431 to i32 %433 = shl i32 %432, 4 %434 = add i32 %433, 144 %435 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %434) %436 = shl i32 %432, 4 %437 = add i32 %436, 148 %438 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %437) %439 = shl i32 %432, 4 %440 = add i32 %439, 152 %441 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %440) %442 = shl i32 %432, 4 %443 = add i32 %442, 156 %444 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %443) %445 = fmul float %77, %435 %446 = fmul float %79, %438 %447 = fadd float %445, %446 %448 = fmul float %81, %441 %449 = fadd float %447, %448 %450 = fmul float %83, %444 %451 = fadd float %449, %450 %452 = fptosi float %429 to i32 %453 = bitcast i32 %452 to float %454 = bitcast float %453 to i32 %455 = add i32 1, %454 %456 = bitcast i32 %455 to float %457 = bitcast float %456 to i32 %458 = shl i32 %457, 4 %459 = add i32 %458, 144 %460 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %459) %461 = shl i32 %457, 4 %462 = add i32 %461, 148 %463 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %462) %464 = shl i32 %457, 4 %465 = add i32 %464, 152 %466 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %465) %467 = shl i32 %457, 4 %468 = add i32 %467, 156 %469 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %468) %470 = fmul float %77, %460 %471 = fmul float %79, %463 %472 = fadd float %470, %471 %473 = fmul float %81, %466 %474 = fadd float %472, %473 %475 = fmul float %83, %469 %476 = fadd float %474, %475 %477 = fptosi float %429 to i32 %478 = bitcast i32 %477 to float %479 = bitcast float %478 to i32 %480 = add i32 2, %479 %481 = bitcast i32 %480 to float %482 = bitcast float %481 to i32 %483 = shl i32 %482, 4 %484 = add i32 %483, 144 %485 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %484) %486 = shl i32 %482, 4 %487 = add i32 %486, 148 %488 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %487) %489 = shl i32 %482, 4 %490 = add i32 %489, 152 %491 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %490) %492 = shl i32 %482, 4 %493 = add i32 %492, 156 %494 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %493) %495 = fmul float %77, %485 %496 = fmul float %79, %488 %497 = fadd float %495, %496 %498 = fmul float %81, %491 %499 = fadd float %497, %498 %500 = fmul float %83, %494 %501 = fadd float %499, %500 %502 = fmul float %54, %451 %503 = fadd float %502, %298 %504 = fmul float %54, %476 %505 = fadd float %504, %300 %506 = fmul float %54, %501 %507 = fadd float %506, %302 %508 = fptosi float %429 to i32 %509 = bitcast i32 %508 to float %510 = bitcast float %509 to i32 %511 = shl i32 %510, 4 %512 = add i32 %511, 144 %513 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %512) %514 = shl i32 %510, 4 %515 = add i32 %514, 148 %516 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %515) %517 = shl i32 %510, 4 %518 = add i32 %517, 152 %519 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %518) %520 = fmul float %66, %513 %521 = fmul float %67, %516 %522 = fadd float %521, %520 %523 = fmul float %68, %519 %524 = fadd float %522, %523 %525 = fptosi float %429 to i32 %526 = bitcast i32 %525 to float %527 = bitcast float %526 to i32 %528 = add i32 1, %527 %529 = bitcast i32 %528 to float %530 = bitcast float %529 to i32 %531 = shl i32 %530, 4 %532 = add i32 %531, 144 %533 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %532) %534 = shl i32 %530, 4 %535 = add i32 %534, 148 %536 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %535) %537 = shl i32 %530, 4 %538 = add i32 %537, 152 %539 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %538) %540 = fmul float %66, %533 %541 = fmul float %67, %536 %542 = fadd float %541, %540 %543 = fmul float %68, %539 %544 = fadd float %542, %543 %545 = fptosi float %429 to i32 %546 = bitcast i32 %545 to float %547 = bitcast float %546 to i32 %548 = add i32 2, %547 %549 = bitcast i32 %548 to float %550 = bitcast float %549 to i32 %551 = shl i32 %550, 4 %552 = add i32 %551, 144 %553 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %552) %554 = shl i32 %550, 4 %555 = add i32 %554, 148 %556 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %555) %557 = shl i32 %550, 4 %558 = add i32 %557, 152 %559 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %558) %560 = fmul float %66, %553 %561 = fmul float %67, %556 %562 = fadd float %561, %560 %563 = fmul float %68, %559 %564 = fadd float %562, %563 %565 = fmul float %54, %524 %566 = fadd float %565, %361 %567 = fmul float %54, %544 %568 = fadd float %567, %363 %569 = fmul float %54, %564 %570 = fadd float %569, %365 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v6, s10, v0 ; 4A0C000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[19:22], s[4:7][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80011306 V_MOV_B32_e32 v0, 3.000000e+00 ; 7E0002FF 40400000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v19, v0 ; 10000113 V_CVT_I32_F32_e32 v0, v0 ; 7E001100 V_LSHLREV_B32_e32 v2, 4, v0 ; 34040084 V_ADD_I32_e32 v1, 0x90, v2 ; 4A0204FF 00000090 S_LOAD_DWORDX4 s[4:7], s[2:3], 0x0 ; C0820300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v3, s[4:7] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010301 S_LOAD_DWORDX4 s[0:3], s[8:9], 0xc ; C080090C S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[15:18], s[0:3][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000F06 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v1, v15, v3 ; 1002070F V_ADD_I32_e32 v4, 0x94, v2 ; 4A0804FF 00000094 BUFFER_LOAD_DWORD v7, s[4:7] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010704 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v16, v7, v1, 0, 0 ; D2820001 04060F10 V_ADD_I32_e32 v4, 0x98, v2 ; 4A0804FF 00000098 BUFFER_LOAD_DWORD v8, s[4:7] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010804 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v17, v8, v1, 0, 0 ; D2820001 04061111 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[11:14], s[0:3][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000B06 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v1, v1, v11 ; 10021701 V_ADD_I32_e32 v4, 2, v0 ; 4A080082 V_LSHLREV_B32_e32 v9, 4, v4 ; 34120884 V_ADD_I32_e32 v4, 0x90, v9 ; 4A0812FF 00000090 BUFFER_LOAD_DWORD v10, s[4:7] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010A04 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v15, v10 ; 1008150F V_ADD_I32_e32 v5, 0x94, v9 ; 4A0A12FF 00000094 BUFFER_LOAD_DWORD v27, s[4:7] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011B05 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v16, v27, v4, 0, 0 ; D2820004 04123710 V_ADD_I32_e32 v5, 0x98, v9 ; 4A0A12FF 00000098 BUFFER_LOAD_DWORD v28, s[4:7] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011C05 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v4, v17, v28, v4, 0, 0 ; D2820004 04123911 V_MUL_F32_e32 v4, v4, v11 ; 10081704 V_ADD_I32_e32 v0, 1, v0 ; 4A000081 V_LSHLREV_B32_e32 v29, 4, v0 ; 343A0084 V_ADD_I32_e32 v0, 0x90, v29 ; 4A003AFF 00000090 BUFFER_LOAD_DWORD v30, s[4:7] + v0 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011E00 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v15, v30 ; 10003D0F V_ADD_I32_e32 v5, 0x94, v29 ; 4A0A3AFF 00000094 BUFFER_LOAD_DWORD v31, s[4:7] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011F05 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v16, v31, v0, 0, 0 ; D2820000 04023F10 V_ADD_I32_e32 v5, 0x98, v29 ; 4A0A3AFF 00000098 BUFFER_LOAD_DWORD v32, s[4:7] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012005 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v17, v32, v0, 0, 0 ; D2820000 04024111 V_MUL_F32_e32 v5, v0, v11 ; 100A1700 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[23:26], s[0:3][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80001706 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v24, v7 ; 10000F18 V_MAD_F32 v0, v23, v3, v0, 0, 0 ; D2820000 04020717 V_MAD_F32 v0, v25, v8, v0, 0, 0 ; D2820000 04021119 V_ADD_I32_e32 v2, 0x9c, v2 ; 4A0404FF 0000009C BUFFER_LOAD_DWORD v2, s[4:7] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010202 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v0, v2 ; 06000500 V_MUL_F32_e32 v0, v0, v11 ; 10001700 V_MUL_F32_e32 v2, v24, v27 ; 10043718 V_MAD_F32 v2, v23, v10, v2, 0, 0 ; D2820002 040A1517 V_MAD_F32 v2, v25, v28, v2, 0, 0 ; D2820002 040A3919 V_ADD_I32_e32 v3, 0x9c, v9 ; 4A0612FF 0000009C BUFFER_LOAD_DWORD v3, s[4:7] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010303 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v2, v2, v3 ; 06040702 V_MUL_F32_e32 v2, v2, v11 ; 10041702 V_MUL_F32_e32 v3, v24, v31 ; 10063F18 V_MAD_F32 v3, v23, v30, v3, 0, 0 ; D2820003 040E3D17 V_MAD_F32 v3, v25, v32, v3, 0, 0 ; D2820003 040E4119 V_ADD_I32_e32 v7, 0x9c, v29 ; 4A0E3AFF 0000009C BUFFER_LOAD_DWORD v7, s[4:7] + v7 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80010707 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v3, v3, v7 ; 06060F03 V_MUL_F32_e32 v3, v3, v11 ; 10061703 S_LOAD_DWORDX4 s[0:3], s[8:9], 0x10 ; C0800910 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[6:9], s[0:3][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000606 V_CMP_GT_F32_e64 s[0:1], v12, 0.000000e+00, 0, 0 ; D0080000 0001010C V_MOV_B32_e32 v10, 0.000000e+00 ; 7E140280 V_MOV_B32_e32 v27, 1.000000e+00 ; 7E3602F2 S_WAITCNT vmcnt(0) ; BF8C0770 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_CBRANCH_EXECZ BB0_3 ; BF880000 V_MOV_B32_e32 v10, 3.000000e+00 ; 7E1402FF 40400000 V_MUL_F32_e32 v10, v20, v10 ; 10141514 V_CVT_I32_F32_e32 v28, v10 ; 7E38110A V_LSHLREV_B32_e32 v29, 4, v28 ; 343A3884 V_ADD_I32_e32 v30, 0x94, v29 ; 4A3C3AFF 00000094 BUFFER_LOAD_DWORD v30, s[4:7] + v30 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011E1E V_ADD_I32_e32 v31, 0x90, v29 ; 4A3E3AFF 00000090 BUFFER_LOAD_DWORD v31, s[4:7] + v31 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011F1F S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v32, v15, v31 ; 10403F0F V_MAD_F32 v32, v16, v30, v32, 0, 0 ; D2820020 04823D10 V_ADD_I32_e32 v33, 0x98, v29 ; 4A423AFF 00000098 BUFFER_LOAD_DWORD v33, s[4:7] + v33 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012121 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v32, v17, v33, v32, 0, 0 ; D2820020 04824311 V_MAD_F32 v1, v12, v32, v1, 0, 0 ; D2820001 0406410C V_ADD_I32_e32 v32, 2, v28 ; 4A403882 V_LSHLREV_B32_e32 v32, 4, v32 ; 34404084 V_ADD_I32_e32 v34, 0x94, v32 ; 4A4440FF 00000094 BUFFER_LOAD_DWORD v34, s[4:7] + v34 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012222 V_ADD_I32_e32 v35, 0x90, v32 ; 4A4640FF 00000090 BUFFER_LOAD_DWORD v35, s[4:7] + v35 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012323 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v36, v15, v35 ; 1048470F V_MAD_F32 v36, v16, v34, v36, 0, 0 ; D2820024 04924510 V_ADD_I32_e32 v37, 0x98, v32 ; 4A4A40FF 00000098 BUFFER_LOAD_DWORD v37, s[4:7] + v37 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012525 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v36, v17, v37, v36, 0, 0 ; D2820024 04924B11 V_MAD_F32 v4, v12, v36, v4, 0, 0 ; D2820004 0412490C V_ADD_I32_e32 v28, 1, v28 ; 4A383881 V_LSHLREV_B32_e32 v28, 4, v28 ; 34383884 V_ADD_I32_e32 v36, 0x94, v28 ; 4A4838FF 00000094 BUFFER_LOAD_DWORD v36, s[4:7] + v36 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012424 V_ADD_I32_e32 v38, 0x90, v28 ; 4A4C38FF 00000090 BUFFER_LOAD_DWORD v38, s[4:7] + v38 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012626 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v39, v15, v38 ; 104E4D0F V_MAD_F32 v39, v16, v36, v39, 0, 0 ; D2820027 049E4910 V_ADD_I32_e32 v40, 0x98, v28 ; 4A5038FF 00000098 BUFFER_LOAD_DWORD v40, s[4:7] + v40 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012828 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v39, v17, v40, v39, 0, 0 ; D2820027 049E5111 V_MAD_F32 v5, v12, v39, v5, 0, 0 ; D2820005 04164F0C V_MUL_F32_e32 v30, v24, v30 ; 103C3D18 V_MAD_F32 v30, v23, v31, v30, 0, 0 ; D282001E 047A3F17 V_MAD_F32 v30, v25, v33, v30, 0, 0 ; D282001E 047A4319 V_ADD_I32_e32 v29, 0x9c, v29 ; 4A3A3AFF 0000009C BUFFER_LOAD_DWORD v29, s[4:7] + v29 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011D1D S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v29, v27, v29, v30, 0, 0 ; D282001D 047A3B1B V_MAD_F32 v0, v12, v29, v0, 0, 0 ; D2820000 04023B0C V_MUL_F32_e32 v29, v24, v34 ; 103A4518 V_MAD_F32 v29, v23, v35, v29, 0, 0 ; D282001D 04764717 V_MAD_F32 v29, v25, v37, v29, 0, 0 ; D282001D 04764B19 V_ADD_I32_e32 v30, 0x9c, v32 ; 4A3C40FF 0000009C BUFFER_LOAD_DWORD v30, s[4:7] + v30 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011E1E S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v29, v27, v30, v29, 0, 0 ; D282001D 04763D1B V_MAD_F32 v2, v12, v29, v2, 0, 0 ; D2820002 040A3B0C V_MUL_F32_e32 v29, v24, v36 ; 103A4918 V_MAD_F32 v29, v23, v38, v29, 0, 0 ; D282001D 04764D17 V_MAD_F32 v29, v25, v40, v29, 0, 0 ; D282001D 04765119 V_ADD_I32_e32 v28, 0x9c, v28 ; 4A3838FF 0000009C BUFFER_LOAD_DWORD v28, s[4:7] + v28 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011C1C S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v28, v27, v28, v29, 0, 0 ; D282001C 0476391B V_MAD_F32 v3, v12, v28, v3, 0, 0 ; D2820003 040E390C V_CMP_GT_F32_e64 s[2:3], v13, 0.000000e+00, 0, 0 ; D0080002 0001010D S_AND_SAVEEXEC_B64 s[2:3], s[2:3] ; BE822402 S_XOR_B64 s[2:3], exec, s[2:3] ; 8982027E S_CBRANCH_EXECZ BB0_2 ; BF880000 V_MOV_B32_e32 v10, 3.000000e+00 ; 7E1402FF 40400000 V_MUL_F32_e32 v10, v21, v10 ; 10141515 V_CVT_I32_F32_e32 v19, v10 ; 7E26110A V_LSHLREV_B32_e32 v20, 4, v19 ; 34282684 V_ADD_I32_e32 v21, 0x94, v20 ; 4A2A28FF 00000094 BUFFER_LOAD_DWORD v21, s[4:7] + v21 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011515 V_ADD_I32_e32 v22, 0x90, v20 ; 4A2C28FF 00000090 BUFFER_LOAD_DWORD v22, s[4:7] + v22 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011616 S_WAITCNT vmcnt(0) expcnt(0) ; BF8C0700 V_MUL_F32_e32 v28, v15, v22 ; 10382D0F V_MAD_F32 v28, v16, v21, v28, 0, 0 ; D282001C 04722B10 V_ADD_I32_e32 v29, 0x98, v20 ; 4A3A28FF 00000098 BUFFER_LOAD_DWORD v29, s[4:7] + v29 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011D1D S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v28, v17, v29, v28, 0, 0 ; D282001C 04723B11 V_MAD_F32 v1, v13, v28, v1, 0, 0 ; D2820001 0406390D V_ADD_I32_e32 v28, 2, v19 ; 4A382682 V_LSHLREV_B32_e32 v28, 4, v28 ; 34383884 V_ADD_I32_e32 v30, 0x94, v28 ; 4A3C38FF 00000094 BUFFER_LOAD_DWORD v30, s[4:7] + v30 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011E1E V_ADD_I32_e32 v31, 0x90, v28 ; 4A3E38FF 00000090 BUFFER_LOAD_DWORD v31, s[4:7] + v31 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011F1F S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v32, v15, v31 ; 10403F0F V_MAD_F32 v32, v16, v30, v32, 0, 0 ; D2820020 04823D10 V_ADD_I32_e32 v33, 0x98, v28 ; 4A4238FF 00000098 BUFFER_LOAD_DWORD v33, s[4:7] + v33 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012121 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v32, v17, v33, v32, 0, 0 ; D2820020 04824311 V_MAD_F32 v4, v13, v32, v4, 0, 0 ; D2820004 0412410D V_ADD_I32_e32 v19, 1, v19 ; 4A262681 V_LSHLREV_B32_e32 v19, 4, v19 ; 34262684 V_ADD_I32_e32 v32, 0x94, v19 ; 4A4026FF 00000094 BUFFER_LOAD_DWORD v32, s[4:7] + v32 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012020 V_ADD_I32_e32 v34, 0x90, v19 ; 4A4426FF 00000090 BUFFER_LOAD_DWORD v34, s[4:7] + v34 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012222 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v35, v15, v34 ; 1046450F V_MAD_F32 v35, v16, v32, v35, 0, 0 ; D2820023 048E4110 V_ADD_I32_e32 v36, 0x98, v19 ; 4A4826FF 00000098 BUFFER_LOAD_DWORD v36, s[4:7] + v36 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80012424 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v15, v17, v36, v35, 0, 0 ; D282000F 048E4911 V_MAD_F32 v5, v13, v15, v5, 0, 0 ; D2820005 04161F0D V_MUL_F32_e32 v15, v24, v21 ; 101E2B18 V_MAD_F32 v15, v23, v22, v15, 0, 0 ; D282000F 043E2D17 V_MAD_F32 v15, v25, v29, v15, 0, 0 ; D282000F 043E3B19 V_ADD_I32_e32 v16, 0x9c, v20 ; 4A2028FF 0000009C BUFFER_LOAD_DWORD v16, s[4:7] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011010 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v15, v27, v16, v15, 0, 0 ; D282000F 043E211B V_MAD_F32 v0, v13, v15, v0, 0, 0 ; D2820000 04021F0D V_MUL_F32_e32 v15, v24, v30 ; 101E3D18 V_MAD_F32 v15, v23, v31, v15, 0, 0 ; D282000F 043E3F17 V_MAD_F32 v15, v25, v33, v15, 0, 0 ; D282000F 043E4319 V_ADD_I32_e32 v16, 0x9c, v28 ; 4A2038FF 0000009C BUFFER_LOAD_DWORD v16, s[4:7] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011010 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v15, v27, v16, v15, 0, 0 ; D282000F 043E211B V_MAD_F32 v2, v13, v15, v2, 0, 0 ; D2820002 040A1F0D V_MUL_F32_e32 v15, v24, v32 ; 101E4118 V_MAD_F32 v15, v23, v34, v15, 0, 0 ; D282000F 043E4517 V_MAD_F32 v15, v25, v36, v15, 0, 0 ; D282000F 043E4919 V_ADD_I32_e32 v16, 0x9c, v19 ; 4A2026FF 0000009C BUFFER_LOAD_DWORD v16, s[4:7] + v16 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80011010 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v15, v27, v16, v15, 0, 0 ; D282000F 043E211B V_MAD_F32 v3, v13, v15, v3, 0, 0 ; D2820003 040E1F0D S_OR_B64 exec, exec, s[2:3] ; 88FE027E S_OR_B64 exec, exec, s[0:1] ; 88FE007E S_BUFFER_LOAD_DWORD s2, s[4:7], 0x23 ; C2010523 S_BUFFER_LOAD_DWORD s3, s[4:7], 0x22 ; C2018522 S_BUFFER_LOAD_DWORD s8, s[4:7], 0x21 ; C2040521 S_BUFFER_LOAD_DWORD s9, s[4:7], 0x20 ; C2048520 S_BUFFER_LOAD_DWORD s10, s[4:7], 0x1d ; C205051D S_BUFFER_LOAD_DWORD s11, s[4:7], 0x1c ; C205851C S_BUFFER_LOAD_DWORD s12, s[4:7], 0x19 ; C2060519 S_BUFFER_LOAD_DWORD s13, s[4:7], 0x18 ; C2068518 S_BUFFER_LOAD_DWORD s14, s[4:7], 0x15 ; C2070515 S_BUFFER_LOAD_DWORD s15, s[4:7], 0x14 ; C2078514 S_BUFFER_LOAD_DWORD s16, s[4:7], 0x11 ; C2080511 S_BUFFER_LOAD_DWORD s17, s[4:7], 0x10 ; C2088510 S_BUFFER_LOAD_DWORD s18, s[4:7], 0xf ; C209050F S_BUFFER_LOAD_DWORD s19, s[4:7], 0xe ; C209850E S_BUFFER_LOAD_DWORD s20, s[4:7], 0xd ; C20A050D S_BUFFER_LOAD_DWORD s21, s[4:7], 0xc ; C20A850C S_BUFFER_LOAD_DWORD s22, s[4:7], 0xb ; C20B050B S_BUFFER_LOAD_DWORD s23, s[4:7], 0xa ; C20B850A S_BUFFER_LOAD_DWORD s24, s[4:7], 0x9 ; C20C0509 S_BUFFER_LOAD_DWORD s25, s[4:7], 0x8 ; C20C8508 S_BUFFER_LOAD_DWORD s26, s[4:7], 0x7 ; C20D0507 S_BUFFER_LOAD_DWORD s27, s[4:7], 0x6 ; C20D8506 S_BUFFER_LOAD_DWORD s28, s[4:7], 0x5 ; C20E0505 S_BUFFER_LOAD_DWORD s29, s[4:7], 0x4 ; C20E8504 S_BUFFER_LOAD_DWORD s30, s[4:7], 0x3 ; C20F0503 S_BUFFER_LOAD_DWORD s31, s[4:7], 0x2 ; C20F8502 S_BUFFER_LOAD_DWORD s32, s[4:7], 0x1 ; C2100501 S_BUFFER_LOAD_DWORD s33, s[4:7], 0x0 ; C2108500 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v11, s2 ; 7E160202 V_MOV_B32_e32 v12, s3 ; 7E180203 V_MOV_B32_e32 v13, s8 ; 7E1A0208 V_MOV_B32_e32 v14, s9 ; 7E1C0209 V_MOV_B32_e32 v15, s10 ; 7E1E020A V_MOV_B32_e32 v16, s11 ; 7E20020B V_MOV_B32_e32 v17, s12 ; 7E22020C V_MOV_B32_e32 v18, s13 ; 7E24020D V_MOV_B32_e32 v19, s14 ; 7E26020E V_MOV_B32_e32 v20, s15 ; 7E28020F V_MOV_B32_e32 v21, s16 ; 7E2A0210 V_MOV_B32_e32 v22, s17 ; 7E2C0211 V_MOV_B32_e32 v23, s18 ; 7E2E0212 V_MOV_B32_e32 v24, s19 ; 7E300213 V_MOV_B32_e32 v25, s20 ; 7E320214 V_MOV_B32_e32 v26, s21 ; 7E340215 V_MOV_B32_e32 v27, s22 ; 7E360216 V_MOV_B32_e32 v28, s23 ; 7E380217 V_MOV_B32_e32 v29, s24 ; 7E3A0218 V_MOV_B32_e32 v30, s25 ; 7E3C0219 V_MOV_B32_e32 v31, s26 ; 7E3E021A V_MOV_B32_e32 v32, s27 ; 7E40021B V_MOV_B32_e32 v33, s28 ; 7E42021C V_MOV_B32_e32 v34, s29 ; 7E44021D V_MOV_B32_e32 v35, s30 ; 7E46021E V_MOV_B32_e32 v36, s31 ; 7E48021F V_MOV_B32_e32 v37, s32 ; 7E4A0220 V_MOV_B32_e32 v38, s33 ; 7E4C0221 V_MUL_F32_e32 v39, v1, v1 ; 104E0301 V_MAD_F32 v39, v5, v5, v39, 0, 0 ; D2820027 049E0B05 V_MAD_F32 v39, v4, v4, v39, 0, 0 ; D2820027 049E0904 V_MOV_B32_e32 v40, 1.000000e-07 ; 7E5002FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v39, v40 ; 7C0C5127 V_CMP_U_F32_e64 s[0:1], v39, v39, 0, 0 ; D0100000 00024F27 V_CNDMASK_B32_e64 v40, 0, -1, vcc, 0, 0, 0, 0 ; D2000028 01A98280 V_CNDMASK_B32_e64 v41, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000029 00018280 V_OR_B32_e32 v40, v40, v41 ; 38505328 V_MOV_B32_e32 v41, 0x33d6bf95 ; 7E5202FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v40, 0, 0, 0 ; D10A0000 00010128 V_CNDMASK_B32_e64 v39, v41, v39, s[0:1], 0, 0, 0, 0 ; D2000027 00024F29 V_RSQ_CLAMP_F32_e32 v39, v39 ; 7E4E5927 V_MUL_F32_e32 v4, v39, v4 ; 10080927 V_MUL_F32_e32 v5, v39, v5 ; 100A0B27 V_MUL_F32_e32 v1, v39, v1 ; 10020327 EXP 15, 32, 0, 0, 0, v1, v5, v4, v10 ; F800020F 0A040501 S_WAITCNT expcnt(0) ; BF8C070F V_ADD_F32_e32 v1, v3, v11 ; 06021703 V_MUL_F32_e32 v1, v1, v13 ; 10021B01 V_ADD_F32_e32 v4, v0, v12 ; 06081900 V_MUL_F32_e32 v4, v4, v14 ; 10081D04 EXP 15, 33, 0, 0, 0, v6, v7, v4, v1 ; F800021F 01040706 S_WAITCNT expcnt(0) ; BF8C070F V_MOV_B32_e32 v1, 1.000000e+00 ; 7E0202F2 EXP 15, 34, 0, 0, 0, v0, v3, v2, v1 ; F800022F 01020300 V_MUL_F32_e32 v4, v3, v33 ; 10084303 V_MAD_F32 v4, v0, v37, v4, 0, 0 ; D2820004 04124B00 V_MAD_F32 v4, v2, v29, v4, 0, 0 ; D2820004 04123B02 V_ADD_F32_e32 v4, v4, v25 ; 06083304 V_MUL_F32_e32 v5, v3, v31 ; 100A3F03 V_MAD_F32 v5, v0, v35, v5, 0, 0 ; D2820005 04164700 V_MAD_F32 v5, v2, v27, v5, 0, 0 ; D2820005 04163702 V_ADD_F32_e32 v5, v5, v23 ; 060A2F05 V_RCP_F32_e32 v6, v5 ; 7E0C5505 V_MUL_F32_e32 v7, v4, v6 ; 100E0D04 V_MUL_F32_e32 v8, v3, v34 ; 10104503 V_MAD_F32 v8, v0, v38, v8, 0, 0 ; D2820008 04224D00 V_MAD_F32 v8, v2, v30, v8, 0, 0 ; D2820008 04223D02 V_ADD_F32_e32 v8, v8, v26 ; 06103508 V_MUL_F32_e32 v6, v8, v6 ; 100C0D08 V_MOV_B32_e32 v9, 0.000000e+00 ; 7E120280 EXP 15, 35, 0, 0, 0, v6, v7, v9, v1 ; F800023F 01090706 S_WAITCNT expcnt(0) ; BF8C070F V_MUL_F32_e32 v6, v3, v19 ; 100C2703 V_MAD_F32 v6, v0, v21, v6, 0, 0 ; D2820006 041A2B00 V_MAD_F32 v6, v2, v17, v6, 0, 0 ; D2820006 041A2302 V_ADD_F32_e32 v6, v6, v15 ; 060C1F06 V_MAD_F32 v6, v6, -5.000000e-01, -5.000000e-01, 0, 0 ; D2820006 03C5E306 V_MUL_F32_e32 v7, v3, v20 ; 100E2903 V_MAD_F32 v7, v0, v22, v7, 0, 0 ; D2820007 041E2D00 V_MAD_F32 v7, v2, v18, v7, 0, 0 ; D2820007 041E2502 V_ADD_F32_e32 v7, v7, v16 ; 060E2107 V_MAD_F32 v7, v7, 5.000000e-01, -5.000000e-01, 0, 0 ; D2820007 03C5E107 EXP 15, 36, 0, 0, 0, v7, v6, v9, v1 ; F800024F 01090607 S_WAITCNT expcnt(0) ; BF8C070F V_MUL_F32_e32 v1, v3, v32 ; 10024103 V_MAD_F32 v0, v0, v36, v1, 0, 0 ; D2820000 04064900 V_MAD_F32 v0, v2, v28, v0, 0, 0 ; D2820000 04023902 V_ADD_F32_e32 v0, v0, v24 ; 06003100 EXP 15, 12, 0, 1, 0, v8, v4, v0, v5 ; F80008CF 05000408 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..8] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, -0.1000, 0.0000} IMM[1] FLT32 { 0.0000, 0.0529, 0.8460, 0.5290} IMM[2] FLT32 { 199.0000, 0.1000, 0.4545, 1.4427} IMM[3] FLT32 { 1.0000, -1.0000, 0.5000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1] 7: ABS TEMP[2].x, TEMP[1].wwww 8: POW TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 9: MOV TEMP[3].x, TEMP[2].xxxx 10: ADD TEMP[4].xyz, TEMP[2].xxxx, IMM[0].zzzz 11: FSLT TEMP[5].xyz, TEMP[4].xyzz, IMM[0].wwww 12: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 13: OR TEMP[6].x, TEMP[6].xxxx, TEMP[5].yyyy 14: UIF TEMP[6].xxxx :0 15: KILL 16: ENDIF 17: MOV TEMP[5].xyz, IN[0].xyzz 18: TEX TEMP[5], TEMP[5], SAMP[2], CUBE 19: POW TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx 20: POW TEMP[6].y, TEMP[5].yyyy, IMM[0].xxxx 21: POW TEMP[6].z, TEMP[5].zzzz, IMM[0].xxxx 22: POW TEMP[6].w, TEMP[5].wwww, IMM[0].yyyy 23: MUL TEMP[1].xyz, TEMP[1], TEMP[6] 24: MOV TEMP[0].xyz, TEMP[1].xyzx 25: ADD TEMP[1].xyz, TEMP[0], TEMP[0] 26: MOV TEMP[0].xyz, TEMP[1].xyzx 27: ADD TEMP[1].yzw, CONST[3].xxyz, -IN[2].xxyz 28: MOV TEMP[3].yzw, TEMP[1].zyzw 29: DP3 TEMP[5].x, TEMP[1].yzww, TEMP[1].yzww 30: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx 31: RSQ TEMP[5].x, TEMP[5].xxxx 32: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[1].yzww 33: MAD TEMP[3].yzw, TEMP[3], TEMP[5].xxxx, IMM[1].yyzw 34: MOV TEMP[5].w, IMM[0].wwww 35: MOV TEMP[5].x, TEMP[3].yyyy 36: MOV TEMP[5].y, TEMP[3].zzzz 37: MOV TEMP[5].z, TEMP[3].wwww 38: DP4 TEMP[3].x, TEMP[5], TEMP[5] 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[3].xyz, TEMP[5], TEMP[3].xxxx 41: DP3 TEMP[3].x, TEMP[3].xyzz, IN[0].xyzz 42: MOV_SAT TEMP[3].x, TEMP[3].xxxx 43: DP3 TEMP[5].x, IN[0].xyzz, TEMP[1].xyzz 44: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 45: MAD TEMP[5].yzw, TEMP[5].yyyy, IN[0].xxyz, -TEMP[1].xxyz 46: MOV TEMP[6].xy, TEMP[5].yzzz 47: TEX TEMP[6], TEMP[6], SAMP[4], 2D 48: POW TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 49: POW TEMP[7].y, TEMP[6].yyyy, IMM[0].xxxx 50: POW TEMP[7].z, TEMP[6].zzzz, IMM[0].xxxx 51: POW TEMP[7].w, TEMP[6].wwww, IMM[0].yyyy 52: MOV TEMP[4].w, TEMP[7].wwww 53: MOV TEMP[6].xy, IN[1].xyyy 54: TEX TEMP[6], TEMP[6], SAMP[1], 2D 55: POW TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx 56: POW TEMP[8].y, TEMP[6].yyyy, IMM[0].xxxx 57: POW TEMP[8].z, TEMP[6].zzzz, IMM[0].xxxx 58: POW TEMP[8].w, TEMP[6].wwww, IMM[0].yyyy 59: ABS TEMP[6].x, TEMP[8].wwww 60: POW TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 61: MOV TEMP[5].xyz, TEMP[5].yzww 62: TEX TEMP[5], TEMP[5], SAMP[3], CUBE 63: POW TEMP[9].x, TEMP[5].xxxx, IMM[0].xxxx 64: POW TEMP[9].y, TEMP[5].yyyy, IMM[0].xxxx 65: POW TEMP[9].z, TEMP[5].zzzz, IMM[0].xxxx 66: POW TEMP[9].w, TEMP[5].wwww, IMM[0].yyyy 67: LRP TEMP[5].xyz, TEMP[6].xxxx, TEMP[9], TEMP[7] 68: MAD TEMP[6].y, TEMP[6].xxxx, IMM[2].xxxx, IMM[0].yyyy 69: ABS TEMP[3].x, TEMP[3].xxxx 70: POW TEMP[3].x, TEMP[3].xxxx, TEMP[6].yyyy 71: MUL TEMP[6].w, TEMP[6].yyyy, IMM[2].yyyy 72: MOV TEMP[0].w, TEMP[6].wwww 73: MAD TEMP[3].yzw, TEMP[6].wwww, TEMP[3].xxxx, TEMP[5].xxyz 74: MAD TEMP[3].xyz, TEMP[3].yzww, TEMP[8], TEMP[0] 75: ABS TEMP[5].x, TEMP[3].xxxx 76: LG2 TEMP[4].x, TEMP[5].xxxx 77: ABS TEMP[5].x, TEMP[3].yyyy 78: LG2 TEMP[5].x, TEMP[5].xxxx 79: MOV TEMP[4].y, TEMP[5].xxxx 80: ABS TEMP[3].x, TEMP[3].zzzz 81: LG2 TEMP[3].x, TEMP[3].xxxx 82: MOV TEMP[4].z, TEMP[3].xxxx 83: MUL TEMP[3].xyz, TEMP[4], IMM[2].zzzz 84: EX2 TEMP[4].x, TEMP[3].xxxx 85: EX2 TEMP[5].x, TEMP[3].yyyy 86: MOV TEMP[4].y, TEMP[5].xxxx 87: EX2 TEMP[3].x, TEMP[3].zzzz 88: MOV TEMP[4].z, TEMP[3].xxxx 89: MOV TEMP[3].xyz, TEMP[4].xyzz 90: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 3D 91: MAD TEMP[5].xy, IN[4], IMM[3].xyxx, IMM[0].wyww 92: MOV TEMP[5].xy, TEMP[5].xyyy 93: TEX TEMP[5].xzw, TEMP[5], SAMP[7], 2D 94: MOV TEMP[1].w, TEMP[5].wwww 95: MOV TEMP[6].xy, IN[1].zwww 96: TEX TEMP[6].xyz, TEMP[6], SAMP[8], 2D 97: LRP TEMP[3].yzw, TEMP[5].xxxx, TEMP[3].xxyz, TEMP[6].xxyz 98: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 99: MOV TEMP[6].xyz, TEMP[3].yzww 100: TEX TEMP[6], TEMP[6], SAMP[9], 3D 101: LRP TEMP[3].xyz, TEMP[5].xxxx, TEMP[6], TEMP[3].yzww 102: MOV TEMP[1].xyz, TEMP[3].xyzx 103: ADD TEMP[3].xyz, -TEMP[1], CONST[5] 104: MOV TEMP[0].xyz, TEMP[3].xyzx 105: MUL TEMP[3].z, CONST[8].xxxx, IN[2].zzzz 106: MOV TEMP[4].z, TEMP[3].zzzz 107: MOV TEMP[4].xy, IN[2].xyxx 108: ADD TEMP[3].yzw, TEMP[4].xxyz, -CONST[6].xxyz 109: MUL TEMP[5].w, TEMP[3].wwww, CONST[4].xxxx 110: MUL TEMP[5].w, TEMP[5].wwww, IMM[2].wwww 111: EX2 TEMP[5].x, TEMP[5].wwww 112: ADD TEMP[5].w, -TEMP[5].xxxx, IMM[0].yyyy 113: DP3 TEMP[6].x, TEMP[3].yzww, TEMP[3].yzww 114: RCP TEMP[3].x, TEMP[3].wwww 115: MUL TEMP[6].y, TEMP[6].xxxx, CONST[4].yyyy 116: MUL TEMP[5].w, TEMP[5].wwww, TEMP[6].yyyy 117: MUL TEMP[3].w, TEMP[3].xxxx, TEMP[5].wwww 118: MUL TEMP[3].w, TEMP[3].wwww, IMM[2].wwww 119: EX2 TEMP[3].x, TEMP[3].wwww 120: MOV_SAT TEMP[3].x, TEMP[3].xxxx 121: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].yyyy 122: MAD TEMP[5].x, IN[3].yyyy, IMM[3].zzzz, IMM[3].zzzz 123: MOV TEMP[4].x, TEMP[5].xxxx 124: MOV TEMP[4].y, CONST[4].wwww 125: MOV TEMP[4].xy, TEMP[4].xyyy 126: TEX TEMP[4].x, TEMP[4], SAMP[5], 2D 127: MUL TEMP[3].w, TEMP[3].wwww, TEMP[4].xxxx 128: MOV TEMP[0].w, TEMP[3].wwww 129: MAD TEMP[0].xyz, TEMP[3].wwww, TEMP[0], TEMP[1] 130: MOV TEMP[0].xyz, TEMP[0].xyzx 131: MUL TEMP[1].x, TEMP[2].xxxx, IMM[3].zzzz 132: ADD TEMP[3].y, -CONST[7].xxxx, IN[2].zzzz 133: FSGE TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww 134: UIF TEMP[3].xxxx :0 135: MOV TEMP[2].x, TEMP[2].xxxx 136: ELSE :0 137: MOV TEMP[2].x, TEMP[1].xxxx 138: ENDIF 139: MOV TEMP[0].w, TEMP[2].xxxx 140: MOV OUT[0], TEMP[0] 141: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %91 = bitcast float %81 to i32 %92 = bitcast float %82 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %39 to <32 x i8> %96 = bitcast <4 x i32> %41 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = extractelement <4 x float> %97, i32 3 %102 = call float @llvm.pow.f32(float %98, float 0x40019999A0000000) %103 = call float @llvm.pow.f32(float %99, float 0x40019999A0000000) %104 = call float @llvm.pow.f32(float %100, float 0x40019999A0000000) %105 = call float @llvm.pow.f32(float %101, float 1.000000e+00) %106 = call float @fabs(float %105) %107 = call float @llvm.pow.f32(float %106, float 0x40019999A0000000) %108 = fadd float %107, 0xBFB99999A0000000 %109 = fadd float %107, 0xBFB99999A0000000 %110 = fadd float %107, 0xBFB99999A0000000 %111 = fcmp olt float %108, 0.000000e+00 %112 = sext i1 %111 to i32 %113 = fcmp olt float %109, 0.000000e+00 %114 = sext i1 %113 to i32 %115 = fcmp olt float %110, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %112 to float %118 = bitcast i32 %114 to float %119 = bitcast i32 %116 to float %120 = bitcast float %117 to i32 %121 = bitcast float %119 to i32 %122 = or i32 %120, %121 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = bitcast float %118 to i32 %126 = or i32 %124, %125 %127 = bitcast i32 %126 to float %128 = bitcast float %127 to i32 %129 = icmp ne i32 %128, 0 br i1 %129, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %130 = insertelement <4 x float> undef, float %78, i32 0 %131 = insertelement <4 x float> %130, float %79, i32 1 %132 = insertelement <4 x float> %131, float %80, i32 2 %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 3 %134 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %133) %135 = extractelement <4 x float> %134, i32 0 %136 = extractelement <4 x float> %134, i32 1 %137 = extractelement <4 x float> %134, i32 2 %138 = extractelement <4 x float> %134, i32 3 %139 = call float @fabs(float %137) %140 = fdiv float 1.000000e+00, %139 %141 = fmul float %135, %140 %142 = fadd float %141, 1.500000e+00 %143 = fmul float %136, %140 %144 = fadd float %143, 1.500000e+00 %145 = bitcast float %144 to i32 %146 = bitcast float %142 to i32 %147 = bitcast float %138 to i32 %148 = insertelement <4 x i32> undef, i32 %145, i32 0 %149 = insertelement <4 x i32> %148, i32 %146, i32 1 %150 = insertelement <4 x i32> %149, i32 %147, i32 2 %151 = insertelement <4 x i32> %150, i32 undef, i32 3 %152 = bitcast <8 x i32> %47 to <32 x i8> %153 = bitcast <4 x i32> %49 to <16 x i8> %154 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %151, <32 x i8> %152, <16 x i8> %153, i32 4) %155 = extractelement <4 x float> %154, i32 0 %156 = extractelement <4 x float> %154, i32 1 %157 = extractelement <4 x float> %154, i32 2 %158 = call float @llvm.pow.f32(float %155, float 0x40019999A0000000) %159 = call float @llvm.pow.f32(float %156, float 0x40019999A0000000) %160 = call float @llvm.pow.f32(float %157, float 0x40019999A0000000) %161 = fmul float %102, %158 %162 = fmul float %103, %159 %163 = fmul float %104, %160 %164 = fadd float %161, %161 %165 = fadd float %162, %162 %166 = fadd float %163, %163 %167 = fsub float -0.000000e+00, %85 %168 = fadd float %24, %167 %169 = fsub float -0.000000e+00, %86 %170 = fadd float %25, %169 %171 = fsub float -0.000000e+00, %87 %172 = fadd float %26, %171 %173 = fmul float %168, %168 %174 = fmul float %170, %170 %175 = fadd float %174, %173 %176 = fmul float %172, %172 %177 = fadd float %175, %176 %178 = fcmp uge float %177, 0x3E7AD7F2A0000000 %179 = select i1 %178, float %177, float 0x3E7AD7F2A0000000 %180 = call float @llvm.AMDGPU.rsq.clamped.f32(float %179) %181 = fmul float %180, %168 %182 = fmul float %180, %170 %183 = fmul float %180, %172 %184 = fmul float %168, %180 %185 = fadd float %184, 0x3FAB15B580000000 %186 = fmul float %170, %180 %187 = fadd float %186, 0x3FEB126EA0000000 %188 = fmul float %172, %180 %189 = fadd float %188, 0x3FE0ED9160000000 %190 = fmul float %185, %185 %191 = fmul float %187, %187 %192 = fadd float %190, %191 %193 = fmul float %189, %189 %194 = fadd float %192, %193 %195 = fmul float 0.000000e+00, 0.000000e+00 %196 = fadd float %194, %195 %197 = call float @llvm.AMDGPU.rsq.clamped.f32(float %196) %198 = fmul float %185, %197 %199 = fmul float %187, %197 %200 = fmul float %189, %197 %201 = fmul float %198, %78 %202 = fmul float %199, %79 %203 = fadd float %202, %201 %204 = fmul float %200, %80 %205 = fadd float %203, %204 %206 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00) %207 = fmul float %78, %181 %208 = fmul float %79, %182 %209 = fadd float %208, %207 %210 = fmul float %80, %183 %211 = fadd float %209, %210 %212 = fadd float %211, %211 %213 = fsub float -0.000000e+00, %181 %214 = fmul float %212, %78 %215 = fadd float %214, %213 %216 = fsub float -0.000000e+00, %182 %217 = fmul float %212, %79 %218 = fadd float %217, %216 %219 = fsub float -0.000000e+00, %183 %220 = fmul float %212, %80 %221 = fadd float %220, %219 %222 = bitcast float %215 to i32 %223 = bitcast float %218 to i32 %224 = insertelement <2 x i32> undef, i32 %222, i32 0 %225 = insertelement <2 x i32> %224, i32 %223, i32 1 %226 = bitcast <8 x i32> %55 to <32 x i8> %227 = bitcast <4 x i32> %57 to <16 x i8> %228 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %226, <16 x i8> %227, i32 2) %229 = extractelement <4 x float> %228, i32 0 %230 = extractelement <4 x float> %228, i32 1 %231 = extractelement <4 x float> %228, i32 2 %232 = call float @llvm.pow.f32(float %229, float 0x40019999A0000000) %233 = call float @llvm.pow.f32(float %230, float 0x40019999A0000000) %234 = call float @llvm.pow.f32(float %231, float 0x40019999A0000000) %235 = bitcast float %81 to i32 %236 = bitcast float %82 to i32 %237 = insertelement <2 x i32> undef, i32 %235, i32 0 %238 = insertelement <2 x i32> %237, i32 %236, i32 1 %239 = bitcast <8 x i32> %43 to <32 x i8> %240 = bitcast <4 x i32> %45 to <16 x i8> %241 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %238, <32 x i8> %239, <16 x i8> %240, i32 2) %242 = extractelement <4 x float> %241, i32 0 %243 = extractelement <4 x float> %241, i32 1 %244 = extractelement <4 x float> %241, i32 2 %245 = extractelement <4 x float> %241, i32 3 %246 = call float @llvm.pow.f32(float %242, float 0x40019999A0000000) %247 = call float @llvm.pow.f32(float %243, float 0x40019999A0000000) %248 = call float @llvm.pow.f32(float %244, float 0x40019999A0000000) %249 = call float @llvm.pow.f32(float %245, float 1.000000e+00) %250 = call float @fabs(float %249) %251 = call float @llvm.pow.f32(float %250, float 0x40019999A0000000) %252 = insertelement <4 x float> undef, float %215, i32 0 %253 = insertelement <4 x float> %252, float %218, i32 1 %254 = insertelement <4 x float> %253, float %221, i32 2 %255 = insertelement <4 x float> %254, float %221, i32 3 %256 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %255) %257 = extractelement <4 x float> %256, i32 0 %258 = extractelement <4 x float> %256, i32 1 %259 = extractelement <4 x float> %256, i32 2 %260 = extractelement <4 x float> %256, i32 3 %261 = call float @fabs(float %259) %262 = fdiv float 1.000000e+00, %261 %263 = fmul float %257, %262 %264 = fadd float %263, 1.500000e+00 %265 = fmul float %258, %262 %266 = fadd float %265, 1.500000e+00 %267 = bitcast float %266 to i32 %268 = bitcast float %264 to i32 %269 = bitcast float %260 to i32 %270 = insertelement <4 x i32> undef, i32 %267, i32 0 %271 = insertelement <4 x i32> %270, i32 %268, i32 1 %272 = insertelement <4 x i32> %271, i32 %269, i32 2 %273 = insertelement <4 x i32> %272, i32 undef, i32 3 %274 = bitcast <8 x i32> %51 to <32 x i8> %275 = bitcast <4 x i32> %53 to <16 x i8> %276 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %273, <32 x i8> %274, <16 x i8> %275, i32 4) %277 = extractelement <4 x float> %276, i32 0 %278 = extractelement <4 x float> %276, i32 1 %279 = extractelement <4 x float> %276, i32 2 %280 = call float @llvm.pow.f32(float %277, float 0x40019999A0000000) %281 = call float @llvm.pow.f32(float %278, float 0x40019999A0000000) %282 = call float @llvm.pow.f32(float %279, float 0x40019999A0000000) %283 = call float @llvm.AMDGPU.lrp(float %251, float %280, float %232) %284 = call float @llvm.AMDGPU.lrp(float %251, float %281, float %233) %285 = call float @llvm.AMDGPU.lrp(float %251, float %282, float %234) %286 = fmul float %251, 1.990000e+02 %287 = fadd float %286, 1.000000e+00 %288 = call float @fabs(float %206) %289 = call float @llvm.pow.f32(float %288, float %287) %290 = fmul float %287, 0x3FB99999A0000000 %291 = fmul float %290, %289 %292 = fadd float %291, %283 %293 = fmul float %290, %289 %294 = fadd float %293, %284 %295 = fmul float %290, %289 %296 = fadd float %295, %285 %297 = fmul float %292, %246 %298 = fadd float %297, %164 %299 = fmul float %294, %247 %300 = fadd float %299, %165 %301 = fmul float %296, %248 %302 = fadd float %301, %166 %303 = call float @fabs(float %298) %304 = call float @llvm.log2.f32(float %303) %305 = call float @fabs(float %300) %306 = call float @llvm.log2.f32(float %305) %307 = call float @fabs(float %302) %308 = call float @llvm.log2.f32(float %307) %309 = fmul float %304, 0x3FDD1743E0000000 %310 = fmul float %306, 0x3FDD1743E0000000 %311 = fmul float %308, 0x3FDD1743E0000000 %312 = call float @llvm.AMDIL.exp.(float %309) %313 = call float @llvm.AMDIL.exp.(float %310) %314 = call float @llvm.AMDIL.exp.(float %311) %315 = bitcast float %312 to i32 %316 = bitcast float %313 to i32 %317 = bitcast float %314 to i32 %318 = insertelement <4 x i32> undef, i32 %315, i32 0 %319 = insertelement <4 x i32> %318, i32 %316, i32 1 %320 = insertelement <4 x i32> %319, i32 %317, i32 2 %321 = insertelement <4 x i32> %320, i32 undef, i32 3 %322 = bitcast <8 x i32> %63 to <32 x i8> %323 = bitcast <4 x i32> %65 to <16 x i8> %324 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %321, <32 x i8> %322, <16 x i8> %323, i32 3) %325 = extractelement <4 x float> %324, i32 0 %326 = extractelement <4 x float> %324, i32 1 %327 = extractelement <4 x float> %324, i32 2 %328 = fmul float %89, 1.000000e+00 %329 = fadd float %328, 0.000000e+00 %330 = fmul float %90, -1.000000e+00 %331 = fadd float %330, 1.000000e+00 %332 = bitcast float %329 to i32 %333 = bitcast float %331 to i32 %334 = insertelement <2 x i32> undef, i32 %332, i32 0 %335 = insertelement <2 x i32> %334, i32 %333, i32 1 %336 = bitcast <8 x i32> %67 to <32 x i8> %337 = bitcast <4 x i32> %69 to <16 x i8> %338 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %335, <32 x i8> %336, <16 x i8> %337, i32 2) %339 = extractelement <4 x float> %338, i32 0 %340 = extractelement <4 x float> %338, i32 2 %341 = bitcast float %83 to i32 %342 = bitcast float %84 to i32 %343 = insertelement <2 x i32> undef, i32 %341, i32 0 %344 = insertelement <2 x i32> %343, i32 %342, i32 1 %345 = bitcast <8 x i32> %71 to <32 x i8> %346 = bitcast <4 x i32> %73 to <16 x i8> %347 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %344, <32 x i8> %345, <16 x i8> %346, i32 2) %348 = extractelement <4 x float> %347, i32 0 %349 = extractelement <4 x float> %347, i32 1 %350 = extractelement <4 x float> %347, i32 2 %351 = call float @llvm.AMDGPU.lrp(float %339, float %325, float %348) %352 = call float @llvm.AMDGPU.lrp(float %339, float %326, float %349) %353 = call float @llvm.AMDGPU.lrp(float %339, float %327, float %350) %354 = fsub float -0.000000e+00, %339 %355 = fmul float %340, %354 %356 = fadd float %355, %339 %357 = bitcast float %351 to i32 %358 = bitcast float %352 to i32 %359 = bitcast float %353 to i32 %360 = insertelement <4 x i32> undef, i32 %357, i32 0 %361 = insertelement <4 x i32> %360, i32 %358, i32 1 %362 = insertelement <4 x i32> %361, i32 %359, i32 2 %363 = insertelement <4 x i32> %362, i32 undef, i32 3 %364 = bitcast <8 x i32> %75 to <32 x i8> %365 = bitcast <4 x i32> %77 to <16 x i8> %366 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %363, <32 x i8> %364, <16 x i8> %365, i32 3) %367 = extractelement <4 x float> %366, i32 0 %368 = extractelement <4 x float> %366, i32 1 %369 = extractelement <4 x float> %366, i32 2 %370 = call float @llvm.AMDGPU.lrp(float %356, float %367, float %351) %371 = call float @llvm.AMDGPU.lrp(float %356, float %368, float %352) %372 = call float @llvm.AMDGPU.lrp(float %356, float %369, float %353) %373 = fsub float -0.000000e+00, %370 %374 = fadd float %373, %30 %375 = fsub float -0.000000e+00, %371 %376 = fadd float %375, %31 %377 = fsub float -0.000000e+00, %372 %378 = fadd float %377, %32 %379 = fmul float %37, %87 %380 = fsub float -0.000000e+00, %33 %381 = fadd float %85, %380 %382 = fsub float -0.000000e+00, %34 %383 = fadd float %86, %382 %384 = fsub float -0.000000e+00, %35 %385 = fadd float %379, %384 %386 = fmul float %385, %27 %387 = fmul float %386, 0x3FF7154CA0000000 %388 = call float @llvm.AMDIL.exp.(float %387) %389 = fsub float -0.000000e+00, %388 %390 = fadd float %389, 1.000000e+00 %391 = fmul float %381, %381 %392 = fmul float %383, %383 %393 = fadd float %392, %391 %394 = fmul float %385, %385 %395 = fadd float %393, %394 %396 = fdiv float 1.000000e+00, %385 %397 = fmul float %395, %28 %398 = fmul float %390, %397 %399 = fmul float %396, %398 %400 = fmul float %399, 0x3FF7154CA0000000 %401 = call float @llvm.AMDIL.exp.(float %400) %402 = call float @llvm.AMDIL.clamp.(float %401, float 0.000000e+00, float 1.000000e+00) %403 = fsub float -0.000000e+00, %402 %404 = fadd float %403, 1.000000e+00 %405 = fmul float %88, 5.000000e-01 %406 = fadd float %405, 5.000000e-01 %407 = bitcast float %406 to i32 %408 = bitcast float %29 to i32 %409 = insertelement <2 x i32> undef, i32 %407, i32 0 %410 = insertelement <2 x i32> %409, i32 %408, i32 1 %411 = bitcast <8 x i32> %59 to <32 x i8> %412 = bitcast <4 x i32> %61 to <16 x i8> %413 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %410, <32 x i8> %411, <16 x i8> %412, i32 2) %414 = extractelement <4 x float> %413, i32 0 %415 = fmul float %404, %414 %416 = fmul float %415, %374 %417 = fadd float %416, %370 %418 = fmul float %415, %376 %419 = fadd float %418, %371 %420 = fmul float %415, %378 %421 = fadd float %420, %372 %422 = fmul float %107, 5.000000e-01 %423 = fsub float -0.000000e+00, %36 %424 = fadd float %423, %87 %425 = fcmp oge float %424, 0.000000e+00 %426 = sext i1 %425 to i32 %427 = bitcast i32 %426 to float %428 = bitcast float %427 to i32 %429 = icmp ne i32 %428, 0 %. = select i1 %429, float %107, float %422 %430 = call i32 @llvm.SI.packf16(float %417, float %419) %431 = bitcast i32 %430 to float %432 = call i32 @llvm.SI.packf16(float %421, float %.) %433 = bitcast i32 %432 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %431, float %433, float %431, float %433) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v26, v0, 1, 1, [m0] ; C8680500 V_INTERP_P2_F32 v26, [v26], v1, 1, 1, [m0] ; C8690501 V_INTERP_P1_F32 v25, v0, 0, 1, [m0] ; C8640400 V_INTERP_P2_F32 v25, [v25], v1, 0, 1, [m0] ; C8650401 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x0 ; C0860500 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x0 ; C0C80700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800F00 00640219 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v6, v4 ; 7E0C4F04 V_MUL_LEGACY_F32_e32 v6, 2.200000e+00, v6 ; 0E0C0CFF 400CCCCD V_EXP_F32_e32 v17, v6 ; 7E224B06 V_LOG_F32_e32 v6, v3 ; 7E0C4F03 V_MUL_LEGACY_F32_e32 v6, 2.200000e+00, v6 ; 0E0C0CFF 400CCCCD V_EXP_F32_e32 v11, v6 ; 7E164B06 V_LOG_F32_e32 v6, v2 ; 7E0C4F02 V_MUL_LEGACY_F32_e32 v6, 2.200000e+00, v6 ; 0E0C0CFF 400CCCCD V_EXP_F32_e32 v10, v6 ; 7E144B06 V_LOG_F32_e32 v2, v5 ; 7E044F05 V_MUL_LEGACY_F32_e32 v2, 1.000000e+00, v2 ; 0E0404F2 V_EXP_F32_e32 v2, v2 ; 7E044B02 V_MOV_B32_e32 v3, 0x7fffffff ; 7E0602FF 7FFFFFFF V_AND_B32_e32 v2, v2, v3 ; 36040702 V_LOG_F32_e32 v2, v2 ; 7E044F02 V_MUL_LEGACY_F32_e32 v2, 2.200000e+00, v2 ; 0E0404FF 400CCCCD V_EXP_F32_e32 v2, v2 ; 7E044B02 V_ADD_F32_e32 v3, -1.000000e-01, v2 ; 060604FF BDCCCCCD V_CMP_LT_F32_e64 s[0:1], v3, 0.000000e+00, 0, 0 ; D0020000 00010103 V_CNDMASK_B32_e64 v3, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000003 00018280 V_OR_B32_e32 v3, v3, v3 ; 38060703 V_CMP_NE_I32_e64 s[0:1], v3, 0, 0, 0 ; D10A0000 00010103 V_INTERP_P1_F32 v6, v0, 1, 4, [m0] ; C8181100 V_INTERP_P2_F32 v6, [v6], v1, 1, 4, [m0] ; C8191101 V_INTERP_P1_F32 v8, v0, 0, 4, [m0] ; C8201000 V_INTERP_P2_F32 v8, [v8], v1, 0, 4, [m0] ; C8211001 V_INTERP_P1_F32 v4, v0, 1, 3, [m0] ; C8100D00 V_INTERP_P2_F32 v4, [v4], v1, 1, 3, [m0] ; C8110D01 V_INTERP_P1_F32 v3, v0, 2, 2, [m0] ; C80C0A00 V_INTERP_P2_F32 v3, [v3], v1, 2, 2, [m0] ; C80D0A01 V_INTERP_P1_F32 v5, v0, 1, 2, [m0] ; C8140900 V_INTERP_P2_F32 v5, [v5], v1, 1, 2, [m0] ; C8150901 V_INTERP_P1_F32 v7, v0, 0, 2, [m0] ; C81C0800 V_INTERP_P2_F32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 V_INTERP_P1_F32 v16, v0, 3, 1, [m0] ; C8400700 V_INTERP_P2_F32 v16, [v16], v1, 3, 1, [m0] ; C8410701 V_INTERP_P1_F32 v15, v0, 2, 1, [m0] ; C83C0600 V_INTERP_P2_F32 v15, [v15], v1, 2, 1, [m0] ; C83D0601 V_INTERP_P1_F32 v29, v0, 2, 0, [m0] ; C8740200 V_INTERP_P2_F32 v29, [v29], v1, 2, 0, [m0] ; C8750201 V_INTERP_P1_F32 v28, v0, 1, 0, [m0] ; C8700100 V_INTERP_P2_F32 v28, [v28], v1, 1, 0, [m0] ; C8710101 V_INTERP_P1_F32 v27, v0, 0, 0, [m0] ; C86C0000 V_INTERP_P2_F32 v27, [v27], v1, 0, 0, [m0] ; C86D0001 S_LOAD_DWORDX4 s[8:11], s[2:3], 0x0 ; C0840300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s2, s[8:11], 0x20 ; C2010920 S_BUFFER_LOAD_DWORD s3, s[8:11], 0x1c ; C201891C S_BUFFER_LOAD_DWORD s12, s[8:11], 0x1a ; C206091A S_BUFFER_LOAD_DWORD s13, s[8:11], 0x19 ; C2068919 S_BUFFER_LOAD_DWORD s14, s[8:11], 0x18 ; C2070918 S_BUFFER_LOAD_DWORD s15, s[8:11], 0x16 ; C2078916 S_BUFFER_LOAD_DWORD s16, s[8:11], 0x15 ; C2080915 S_BUFFER_LOAD_DWORD s17, s[8:11], 0x14 ; C2088914 S_BUFFER_LOAD_DWORD s18, s[8:11], 0x13 ; C2090913 S_BUFFER_LOAD_DWORD s19, s[8:11], 0x11 ; C2098911 S_BUFFER_LOAD_DWORD s20, s[8:11], 0x10 ; C20A0910 S_BUFFER_LOAD_DWORD s21, s[8:11], 0xe ; C20A890E S_BUFFER_LOAD_DWORD s22, s[8:11], 0xd ; C20B090D S_BUFFER_LOAD_DWORD s8, s[8:11], 0xc ; C204090C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v18, s2 ; 7E240202 V_MOV_B32_e32 v0, s3 ; 7E000203 V_MOV_B32_e32 v19, s12 ; 7E26020C V_MOV_B32_e32 v23, s13 ; 7E2E020D V_MOV_B32_e32 v22, s14 ; 7E2C020E V_MOV_B32_e32 v1, s15 ; 7E02020F V_MOV_B32_e32 v24, s16 ; 7E300210 V_MOV_B32_e32 v12, s17 ; 7E180211 V_MOV_B32_e32 v14, s18 ; 7E1C0212 V_MOV_B32_e32 v21, s19 ; 7E2A0213 V_MOV_B32_e32 v20, s20 ; 7E280214 V_MOV_B32_e32 v31, s21 ; 7E3E0215 V_MOV_B32_e32 v33, s22 ; 7E420216 V_MOV_B32_e32 v32, s8 ; 7E400208 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E V_SUB_F32_e32 v33, v33, v5 ; 08420B21 V_SUB_F32_e32 v32, v32, v7 ; 08400F20 V_MUL_F32_e32 v34, v32, v32 ; 10444120 V_MAD_F32 v34, v33, v33, v34, 0, 0 ; D2820022 048A4321 V_SUB_F32_e32 v31, v31, v3 ; 083E071F V_MAD_F32 v34, v31, v31, v34, 0, 0 ; D2820022 048A3F1F V_MOV_B32_e32 v35, 1.000000e-07 ; 7E4602FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v34, v35 ; 7C0C4722 V_CMP_U_F32_e64 s[0:1], v34, v34, 0, 0 ; D0100000 00024522 V_CNDMASK_B32_e64 v35, 0, -1, vcc, 0, 0, 0, 0 ; D2000023 01A98280 V_CNDMASK_B32_e64 v36, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000024 00018280 V_OR_B32_e32 v35, v35, v36 ; 38464923 V_MOV_B32_e32 v36, 0x33d6bf95 ; 7E4802FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v35, 0, 0, 0 ; D10A0000 00010123 V_CNDMASK_B32_e64 v34, v36, v34, s[0:1], 0, 0, 0, 0 ; D2000022 00024524 V_RSQ_CLAMP_F32_e32 v34, v34 ; 7E445922 V_MUL_F32_e32 v35, v33, v34 ; 10464521 V_MUL_F32_e32 v36, v32, v34 ; 10484520 V_MUL_F32_e32 v37, v27, v36 ; 104A491B V_MAD_F32 v37, v28, v35, v37, 0, 0 ; D2820025 0496471C V_MUL_F32_e32 v38, v31, v34 ; 104C451F V_MAD_F32 v37, v29, v38, v37, 0, 0 ; D2820025 04964D1D V_ADD_F32_e32 v37, v37, v37 ; 064A4B25 V_MUL_F32_e32 v39, v37, v29 ; 104E3B25 V_SUB_F32_e32 v40, v39, v38 ; 08504D27 V_MUL_F32_e32 v42, v37, v28 ; 10543925 V_SUB_F32_e32 v39, v42, v35 ; 084E472A V_MUL_F32_e32 v35, v37, v27 ; 10463725 V_SUB_F32_e32 v38, v35, v36 ; 084C4923 V_MOV_B32_e32 v41, v40 ; 7E520328 V_CUBESC_F32 v43, v38, v39, v40, 0, 0 ; D28A002B 04A24F26 V_CUBETC_F32 v42, v38, v39, v40, 0, 0 ; D28C002A 04A24F26 V_CUBEMA_F32 v44, v38, v39, v40, 0, 0 ; D28E002C 04A24F26 V_CUBEID_F32 v45, v38, v39, v40, 0, 0 ; D288002D 04A24F26 V_MOV_B32_e32 v35, 0x7fffffff ; 7E4602FF 7FFFFFFF V_AND_B32_e32 v35, v44, v35 ; 3646472C V_RCP_F32_e32 v35, v35 ; 7E465523 V_MOV_B32_e32 v36, 1.500000e+00 ; 7E4802FF 3FC00000 V_MAD_F32 v44, v42, v35, v36, 0, 0 ; D282002C 0492472A V_MAD_F32 v43, v43, v35, v36, 0, 0 ; D282002B 0492472B S_LOAD_DWORDX4 s[0:3], s[4:5], 0xc ; C080050C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x18 ; C0C40718 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[42:44], 7, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[8:15], s[0:3] ; F0800700 00022A2B S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v35, v44 ; 7E464F2C V_MUL_LEGACY_F32_e32 v35, 2.200000e+00, v35 ; 0E4646FF 400CCCCD V_EXP_F32_e32 v35, v35 ; 7E464B23 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x10 ; C0800510 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x20 ; C0C40720 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[37:39], 7, 0, 0, 0, 0, 0, 0, 0, v[38:39], s[8:15], s[0:3] ; F0800700 00022526 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v40, v39 ; 7E504F27 V_MUL_LEGACY_F32_e32 v40, 2.200000e+00, v40 ; 0E5050FF 400CCCCD V_EXP_F32_e32 v40, v40 ; 7E504B28 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x8 ; C0C40708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[45:48], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[8:15], s[0:3] ; F0800F00 00022D19 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v25, v48 ; 7E324F30 V_MUL_LEGACY_F32_e32 v25, 1.000000e+00, v25 ; 0E3232F2 V_EXP_F32_e32 v25, v25 ; 7E324B19 V_MOV_B32_e32 v26, 0x7fffffff ; 7E3402FF 7FFFFFFF V_AND_B32_e32 v25, v25, v26 ; 36323519 V_LOG_F32_e32 v25, v25 ; 7E324F19 V_MUL_LEGACY_F32_e32 v25, 2.200000e+00, v25 ; 0E3232FF 400CCCCD V_EXP_F32_e32 v25, v25 ; 7E324B19 V_SUB_F32_e32 v26, 1.000000e+00, v25 ; 083432F2 V_MUL_F32_e32 v40, v26, v40 ; 1050511A V_MAD_F32 v35, v25, v35, v40, 0, 0 ; D2820023 04A24719 V_MOV_B32_e32 v40, 5.290000e-02 ; 7E5002FF 3D58ADAC V_MAD_F32 v32, v32, v34, v40, 0, 0 ; D2820020 04A24520 V_MOV_B32_e32 v40, 8.460000e-01 ; 7E5002FF 3F589375 V_MAD_F32 v33, v33, v34, v40, 0, 0 ; D2820021 04A24521 V_MUL_F32_e32 v40, v33, v33 ; 10504321 V_MAD_F32 v40, v32, v32, v40, 0, 0 ; D2820028 04A24120 V_MOV_B32_e32 v41, 5.290000e-01 ; 7E5202FF 3F076C8B V_MAD_F32 v31, v31, v34, v41, 0, 0 ; D282001F 04A6451F V_MAD_F32 v34, v31, v31, v40, 0, 0 ; D2820022 04A23F1F V_RSQ_CLAMP_F32_e32 v34, v34 ; 7E445922 V_MUL_F32_e32 v33, v33, v34 ; 10424521 V_MUL_F32_e32 v32, v32, v34 ; 10404520 V_MUL_F32_e32 v32, v32, v27 ; 10403720 V_MAD_F32 v32, v33, v28, v32, 0, 0 ; D2820020 04823921 V_MUL_F32_e32 v31, v31, v34 ; 103E451F V_MAD_F32 v31, v31, v29, v32, 0, 0 ; D282001F 04823B1F V_ADD_F32_e64 v31, v31, 0, 1, 0 ; D206081F 0001011F V_MOV_B32_e32 v32, 0x7fffffff ; 7E4002FF 7FFFFFFF V_AND_B32_e32 v31, v31, v32 ; 363E411F V_LOG_F32_e32 v31, v31 ; 7E3E4F1F V_MOV_B32_e32 v32, 1.990000e+02 ; 7E4002FF 43470000 V_MAD_F32 v32, v25, v32, 1.000000e+00, 0, 0 ; D2820020 03CA4119 V_MUL_LEGACY_F32_e32 v31, v32, v31 ; 0E3E3F20 V_EXP_F32_e32 v31, v31 ; 7E3E4B1F V_MUL_F32_e32 v32, 1.000000e-01, v32 ; 104040FF 3DCCCCCD V_MAD_F32 v33, v32, v31, v35, 0, 0 ; D2820021 048E3F20 V_MOV_B32_e32 v30, 0.000000e+00 ; 7E3C0280 V_CUBESC_F32 v50, v27, v28, v29, 0, 0 ; D28A0032 0476391B V_CUBETC_F32 v49, v27, v28, v29, 0, 0 ; D28C0031 0476391B V_CUBEMA_F32 v51, v27, v28, v29, 0, 0 ; D28E0033 0476391B V_CUBEID_F32 v52, v27, v28, v29, 0, 0 ; D2880034 0476391B V_MOV_B32_e32 v27, 0x7fffffff ; 7E3602FF 7FFFFFFF V_AND_B32_e32 v27, v51, v27 ; 36363733 V_RCP_F32_e32 v27, v27 ; 7E36551B V_MAD_F32 v51, v49, v27, v36, 0, 0 ; D2820033 04923731 V_MAD_F32 v50, v50, v27, v36, 0, 0 ; D2820032 04923732 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x8 ; C0800508 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x10 ; C0C40710 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[8:15], s[0:3] ; F0800700 00021B32 S_WAITCNT vmcnt(0) ; BF8C0770 V_LOG_F32_e32 v30, v29 ; 7E3C4F1D V_MUL_LEGACY_F32_e32 v30, 2.200000e+00, v30 ; 0E3C3CFF 400CCCCD V_EXP_F32_e32 v30, v30 ; 7E3C4B1E V_MUL_F32_e32 v34, v17, v30 ; 10443D11 V_MAD_F32 v17, v17, v30, v34, 0, 0 ; D2820011 048A3D11 V_LOG_F32_e32 v30, v47 ; 7E3C4F2F V_MUL_LEGACY_F32_e32 v30, 2.200000e+00, v30 ; 0E3C3CFF 400CCCCD V_EXP_F32_e32 v30, v30 ; 7E3C4B1E V_MAD_F32 v17, v33, v30, v17, 0, 0 ; D2820011 04463D21 V_MOV_B32_e32 v30, 0x7fffffff ; 7E3C02FF 7FFFFFFF V_AND_B32_e32 v17, v17, v30 ; 36223D11 V_LOG_F32_e32 v17, v17 ; 7E224F11 V_MUL_F32_e32 v17, 4.545450e-01, v17 ; 102222FF 3EE8BA1F V_EXP_F32_e32 v35, v17 ; 7E464B11 V_LOG_F32_e32 v17, v43 ; 7E224F2B V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_LOG_F32_e32 v30, v38 ; 7E3C4F26 V_MUL_LEGACY_F32_e32 v30, 2.200000e+00, v30 ; 0E3C3CFF 400CCCCD V_EXP_F32_e32 v30, v30 ; 7E3C4B1E V_MUL_F32_e32 v30, v26, v30 ; 103C3D1A V_MAD_F32 v17, v25, v17, v30, 0, 0 ; D2820011 047A2319 V_MAD_F32 v17, v32, v31, v17, 0, 0 ; D2820011 04463F20 V_LOG_F32_e32 v30, v28 ; 7E3C4F1C V_MUL_LEGACY_F32_e32 v30, 2.200000e+00, v30 ; 0E3C3CFF 400CCCCD V_EXP_F32_e32 v30, v30 ; 7E3C4B1E V_MUL_F32_e32 v40, v11, v30 ; 10503D0B V_MAD_F32 v11, v11, v30, v40, 0, 0 ; D282000B 04A23D0B V_LOG_F32_e32 v30, v46 ; 7E3C4F2E V_MUL_LEGACY_F32_e32 v30, 2.200000e+00, v30 ; 0E3C3CFF 400CCCCD V_EXP_F32_e32 v30, v30 ; 7E3C4B1E V_MAD_F32 v11, v17, v30, v11, 0, 0 ; D282000B 042E3D11 V_MOV_B32_e32 v17, 0x7fffffff ; 7E2202FF 7FFFFFFF V_AND_B32_e32 v11, v11, v17 ; 3616230B V_LOG_F32_e32 v11, v11 ; 7E164F0B V_MUL_F32_e32 v11, 4.545450e-01, v11 ; 101616FF 3EE8BA1F V_EXP_F32_e32 v34, v11 ; 7E444B0B V_LOG_F32_e32 v11, v42 ; 7E164F2A V_MUL_LEGACY_F32_e32 v11, 2.200000e+00, v11 ; 0E1616FF 400CCCCD V_EXP_F32_e32 v11, v11 ; 7E164B0B V_LOG_F32_e32 v17, v37 ; 7E224F25 V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_MUL_F32_e32 v17, v26, v17 ; 1022231A V_MAD_F32 v11, v25, v11, v17, 0, 0 ; D282000B 04461719 V_MAD_F32 v11, v32, v31, v11, 0, 0 ; D282000B 042E3F20 V_LOG_F32_e32 v17, v27 ; 7E224F1B V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_MUL_F32_e32 v25, v10, v17 ; 1032230A V_MAD_F32 v10, v10, v17, v25, 0, 0 ; D282000A 0466230A V_LOG_F32_e32 v17, v45 ; 7E224F2D V_MUL_LEGACY_F32_e32 v17, 2.200000e+00, v17 ; 0E2222FF 400CCCCD V_EXP_F32_e32 v17, v17 ; 7E224B11 V_MAD_F32 v10, v11, v17, v10, 0, 0 ; D282000A 042A230B V_MOV_B32_e32 v11, 0x7fffffff ; 7E1602FF 7FFFFFFF V_AND_B32_e32 v10, v10, v11 ; 3614170A V_LOG_F32_e32 v10, v10 ; 7E144F0A V_MUL_F32_e32 v10, 4.545450e-01, v10 ; 101414FF 3EE8BA1F V_EXP_F32_e32 v33, v10 ; 7E424B0A S_LOAD_DWORDX4 s[0:3], s[4:5], 0x18 ; C0800518 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x30 ; C0C40730 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[8:15], s[0:3] ; F0800700 00021921 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x20 ; C0800520 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x40 ; C0C40740 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 IMAGE_SAMPLE v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[8:15], s[0:3] ; F0800700 00020F0F V_SUB_F32_e32 v9, 1.000000e+00, v6 ; 08120CF2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x1c ; C080051C S_LOAD_DWORDX8 s[8:15], s[6:7], 0x38 ; C0C40738 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 IMAGE_SAMPLE v[8:9], 5, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800500 00020808 S_WAITCNT vmcnt(0) ; BF8C0770 V_SUB_F32_e32 v6, 1.000000e+00, v8 ; 080C10F2 V_MUL_F32_e32 v10, v6, v17 ; 10142306 V_MAD_F32 v30, v8, v27, v10, 0, 0 ; D282001E 042A3708 V_MUL_F32_e32 v10, v6, v16 ; 10142106 V_MAD_F32 v29, v8, v26, v10, 0, 0 ; D282001D 042A3508 V_MUL_F32_e32 v6, v6, v15 ; 100C1F06 V_MAD_F32 v28, v8, v25, v6, 0, 0 ; D282001C 041A3308 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x24 ; C0800524 S_LOAD_DWORDX8 s[8:15], s[6:7], 0x48 ; C0C40748 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[8:15], s[0:3] ; F0800700 00020F1C V_MUL_F32_e32 v6, v9, v8 ; 100C1109 V_SUB_F32_e32 v6, v8, v6 ; 080C0D08 V_SUB_F32_e32 v8, 1.000000e+00, v6 ; 08100CF2 V_MUL_F32_e32 v9, v8, v29 ; 10123B08 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v9, v6, v16, v9, 0, 0 ; D2820009 04262106 V_SUB_F32_e32 v10, v24, v9 ; 08141318 V_SUB_F32_e32 v5, v5, v23 ; 080A2F05 V_SUB_F32_e32 v7, v7, v22 ; 080E2D07 V_MUL_F32_e32 v7, v7, v7 ; 100E0F07 V_MAD_F32 v5, v5, v5, v7, 0, 0 ; D2820005 041E0B05 V_MUL_F32_e32 v7, v18, v3 ; 100E0712 V_SUB_F32_e32 v7, v7, v19 ; 080E2707 V_MAD_F32 v5, v7, v7, v5, 0, 0 ; D2820005 04160F07 V_MUL_F32_e32 v5, v5, v21 ; 100A2B05 V_MUL_F32_e32 v11, v7, v20 ; 10162907 V_MUL_F32_e32 v11, 1.442700e+00, v11 ; 101616FF 3FB8AA65 V_EXP_F32_e32 v11, v11 ; 7E164B0B V_SUB_F32_e32 v11, 1.000000e+00, v11 ; 081616F2 V_MUL_F32_e32 v5, v11, v5 ; 100A0B0B V_RCP_F32_e32 v7, v7 ; 7E0E5507 V_MUL_F32_e32 v5, v7, v5 ; 100A0B07 V_MUL_F32_e32 v5, 1.442700e+00, v5 ; 100A0AFF 3FB8AA65 V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_ADD_F32_e64 v5, v5, 0, 1, 0 ; D2060805 00010105 V_SUB_F32_e32 v5, 1.000000e+00, v5 ; 080A0AF2 V_MAD_F32 v13, v4, 5.000000e-01, 5.000000e-01, 0, 0 ; D282000D 03C1E104 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x14 ; C0800514 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x28 ; C0C20728 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v4, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[4:11], s[0:3] ; F0800100 0001040D S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v5, v4 ; 10080905 V_MAD_F32 v5, v4, v10, v9, 0, 0 ; D2820005 04261504 V_MUL_F32_e32 v7, v8, v28 ; 100E3908 V_MAD_F32 v7, v6, v15, v7, 0, 0 ; D2820007 041E1F06 V_SUB_F32_e32 v9, v12, v7 ; 08120F0C V_MAD_F32 v7, v4, v9, v7, 0, 0 ; D2820007 041E1304 V_CVT_PKRTZ_F16_F32_e32 v5, v7, v5 ; 5E0A0B07 V_MUL_F32_e32 v7, v8, v30 ; 100E3D08 V_MAD_F32 v6, v6, v17, v7, 0, 0 ; D2820006 041E2306 V_SUB_F32_e32 v1, v1, v6 ; 08020D01 V_MAD_F32 v1, v4, v1, v6, 0, 0 ; D2820001 041A0304 V_SUB_F32_e32 v0, v3, v0 ; 08000103 V_CMP_GE_F32_e64 s[0:1], v0, 0.000000e+00, 0, 0 ; D00C0000 00010100 V_MUL_F32_e32 v0, 5.000000e-01, v2 ; 100004F0 V_CNDMASK_B32_e64 v0, v0, v2, s[0:1], 0, 0, 0, 0 ; D2000000 00020500 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 EXP 15, 0, 1, 1, 1, v5, v0, v5, v0 ; F8001C0F 00050005 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..103] DCL TEMP[0..5], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+8] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+8] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[3].x, TEMP[0].xxxx 12: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 13: UARL ADDR[0].x, TEMP[3].xxxx 14: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+8] 15: MOV TEMP[2].z, TEMP[3].xxxx 16: MUL TEMP[3].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[3].xyzx 18: F2I TEMP[3].x, TEMP[0].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: UARL ADDR[0].x, TEMP[3].xxxx 21: DP3 TEMP[3].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 22: F2I TEMP[4].x, TEMP[0].xxxx 23: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 24: UARL ADDR[0].x, TEMP[4].xxxx 25: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 26: MOV TEMP[3].y, TEMP[4].xxxx 27: F2I TEMP[0].x, TEMP[0].xxxx 28: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 29: UARL ADDR[0].x, TEMP[0].xxxx 30: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 31: MOV TEMP[3].z, TEMP[0].xxxx 32: MUL TEMP[0].xyz, TEMP[3], IN[1].xxxx 33: MOV TEMP[3].xyz, TEMP[0].xyzx 34: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 35: UIF TEMP[0].xxxx :0 36: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 37: MOV TEMP[2].w, TEMP[0].wwww 38: F2I TEMP[4].x, TEMP[0].wwww 39: UARL ADDR[0].x, TEMP[4].xxxx 40: UARL ADDR[0].x, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+8] 42: F2I TEMP[5].x, TEMP[0].wwww 43: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 44: UARL ADDR[0].x, TEMP[5].xxxx 45: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 46: MOV TEMP[4].y, TEMP[5].xxxx 47: F2I TEMP[5].x, TEMP[0].wwww 48: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 49: UARL ADDR[0].x, TEMP[5].xxxx 50: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 51: MOV TEMP[4].z, TEMP[5].xxxx 52: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[4], TEMP[2] 53: MOV TEMP[2].xyz, TEMP[5].xyzx 54: F2I TEMP[5].x, TEMP[0].wwww 55: UARL ADDR[0].x, TEMP[5].xxxx 56: UARL ADDR[0].x, TEMP[5].xxxx 57: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 58: F2I TEMP[5].x, TEMP[0].wwww 59: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 60: UARL ADDR[0].x, TEMP[5].xxxx 61: DP3 TEMP[5].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 62: MOV TEMP[4].y, TEMP[5].xxxx 63: F2I TEMP[0].x, TEMP[0].wwww 64: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 65: UARL ADDR[0].x, TEMP[0].xxxx 66: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 67: MOV TEMP[4].z, TEMP[0].xxxx 68: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[4], TEMP[3] 69: MOV TEMP[3].xyz, TEMP[0].xyzx 70: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 71: UIF TEMP[0].xxxx :0 72: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 73: MOV TEMP[2].w, TEMP[0].wwww 74: F2I TEMP[5].x, TEMP[0].wwww 75: UARL ADDR[0].x, TEMP[5].xxxx 76: UARL ADDR[0].x, TEMP[5].xxxx 77: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+8] 78: F2I TEMP[5].x, TEMP[0].wwww 79: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 80: UARL ADDR[0].x, TEMP[5].xxxx 81: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 82: MOV TEMP[4].y, TEMP[5].xxxx 83: F2I TEMP[5].x, TEMP[0].wwww 84: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 85: UARL ADDR[0].x, TEMP[5].xxxx 86: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 87: MOV TEMP[4].z, TEMP[5].xxxx 88: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[4], TEMP[2] 89: MOV TEMP[2].xyz, TEMP[4].xyzx 90: F2I TEMP[4].x, TEMP[0].wwww 91: UARL ADDR[0].x, TEMP[4].xxxx 92: UARL ADDR[0].x, TEMP[4].xxxx 93: DP3 TEMP[1].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 94: F2I TEMP[4].x, TEMP[0].wwww 95: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 96: UARL ADDR[0].x, TEMP[4].xxxx 97: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 98: MOV TEMP[1].y, TEMP[4].xxxx 99: F2I TEMP[0].x, TEMP[0].wwww 100: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 101: UARL ADDR[0].x, TEMP[0].xxxx 102: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 103: MOV TEMP[1].z, TEMP[0].xxxx 104: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[1], TEMP[3] 105: MOV TEMP[3].xyz, TEMP[0].xyzx 106: ENDIF 107: ENDIF 108: MUL TEMP[1], TEMP[2].yyyy, CONST[5] 109: MAD TEMP[1], TEMP[2].xxxx, CONST[4], TEMP[1] 110: MAD TEMP[1], TEMP[2].zzzz, CONST[6], TEMP[1] 111: ADD TEMP[1], TEMP[1], CONST[7] 112: ADD TEMP[0].xyz, TEMP[2], -CONST[3] 113: MOV TEMP[0].xyz, TEMP[0].xyzx 114: MOV TEMP[3].w, IN[4].xxxx 115: MOV TEMP[0].w, IN[4].yyyy 116: MOV TEMP[2].xyz, TEMP[2].xyzx 117: MOV TEMP[4].xyz, TEMP[1].xywx 118: MOV TEMP[2].w, IMM[0].yyyy 119: MOV TEMP[4].w, IMM[0].yyyy 120: MOV OUT[1], TEMP[3] 121: MOV OUT[0], TEMP[1] 122: MOV OUT[2], TEMP[0] 123: MOV OUT[3], TEMP[2] 124: MOV OUT[4], TEMP[4] 125: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %40 = load <16 x i8> addrspace(2)* %39, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0 %62 = add i32 %5, %7 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = fmul float 3.000000e+00, %50 %67 = fmul float %36, 1.000000e+00 %68 = fadd float %67, 0.000000e+00 %69 = fmul float %37, 1.000000e+00 %70 = fadd float %69, 0.000000e+00 %71 = fmul float %38, 1.000000e+00 %72 = fadd float %71, 0.000000e+00 %73 = fmul float %36, 0.000000e+00 %74 = fadd float %73, 1.000000e+00 %75 = fptosi float %66 to i32 %76 = bitcast i32 %75 to float %77 = bitcast float %76 to i32 %78 = shl i32 %77, 4 %79 = add i32 %78, 128 %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %79) %81 = shl i32 %77, 4 %82 = add i32 %81, 132 %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %82) %84 = shl i32 %77, 4 %85 = add i32 %84, 136 %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %85) %87 = shl i32 %77, 4 %88 = add i32 %87, 140 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = fmul float %68, %80 %91 = fmul float %70, %83 %92 = fadd float %90, %91 %93 = fmul float %72, %86 %94 = fadd float %92, %93 %95 = fmul float %74, %89 %96 = fadd float %94, %95 %97 = fptosi float %66 to i32 %98 = bitcast i32 %97 to float %99 = bitcast float %98 to i32 %100 = add i32 1, %99 %101 = bitcast i32 %100 to float %102 = bitcast float %101 to i32 %103 = shl i32 %102, 4 %104 = add i32 %103, 128 %105 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %104) %106 = shl i32 %102, 4 %107 = add i32 %106, 132 %108 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %107) %109 = shl i32 %102, 4 %110 = add i32 %109, 136 %111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %110) %112 = shl i32 %102, 4 %113 = add i32 %112, 140 %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %113) %115 = fmul float %68, %105 %116 = fmul float %70, %108 %117 = fadd float %115, %116 %118 = fmul float %72, %111 %119 = fadd float %117, %118 %120 = fmul float %74, %114 %121 = fadd float %119, %120 %122 = fptosi float %66 to i32 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = add i32 2, %124 %126 = bitcast i32 %125 to float %127 = bitcast float %126 to i32 %128 = shl i32 %127, 4 %129 = add i32 %128, 128 %130 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %129) %131 = shl i32 %127, 4 %132 = add i32 %131, 132 %133 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %132) %134 = shl i32 %127, 4 %135 = add i32 %134, 136 %136 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %135) %137 = shl i32 %127, 4 %138 = add i32 %137, 140 %139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %138) %140 = fmul float %68, %130 %141 = fmul float %70, %133 %142 = fadd float %140, %141 %143 = fmul float %72, %136 %144 = fadd float %142, %143 %145 = fmul float %74, %139 %146 = fadd float %144, %145 %147 = fmul float %96, %43 %148 = fmul float %121, %43 %149 = fmul float %146, %43 %150 = fptosi float %66 to i32 %151 = bitcast i32 %150 to float %152 = bitcast float %151 to i32 %153 = shl i32 %152, 4 %154 = add i32 %153, 128 %155 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %154) %156 = shl i32 %152, 4 %157 = add i32 %156, 132 %158 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %157) %159 = shl i32 %152, 4 %160 = add i32 %159, 136 %161 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %160) %162 = fmul float %57, %155 %163 = fmul float %58, %158 %164 = fadd float %163, %162 %165 = fmul float %59, %161 %166 = fadd float %164, %165 %167 = fptosi float %66 to i32 %168 = bitcast i32 %167 to float %169 = bitcast float %168 to i32 %170 = add i32 1, %169 %171 = bitcast i32 %170 to float %172 = bitcast float %171 to i32 %173 = shl i32 %172, 4 %174 = add i32 %173, 128 %175 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %174) %176 = shl i32 %172, 4 %177 = add i32 %176, 132 %178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %177) %179 = shl i32 %172, 4 %180 = add i32 %179, 136 %181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %180) %182 = fmul float %57, %175 %183 = fmul float %58, %178 %184 = fadd float %183, %182 %185 = fmul float %59, %181 %186 = fadd float %184, %185 %187 = fptosi float %66 to i32 %188 = bitcast i32 %187 to float %189 = bitcast float %188 to i32 %190 = add i32 2, %189 %191 = bitcast i32 %190 to float %192 = bitcast float %191 to i32 %193 = shl i32 %192, 4 %194 = add i32 %193, 128 %195 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %194) %196 = shl i32 %192, 4 %197 = add i32 %196, 132 %198 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %197) %199 = shl i32 %192, 4 %200 = add i32 %199, 136 %201 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %200) %202 = fmul float %57, %195 %203 = fmul float %58, %198 %204 = fadd float %203, %202 %205 = fmul float %59, %201 %206 = fadd float %204, %205 %207 = fmul float %166, %43 %208 = fmul float %186, %43 %209 = fmul float %206, %43 %210 = fcmp olt float 0.000000e+00, %44 %211 = sext i1 %210 to i32 %212 = bitcast i32 %211 to float %213 = bitcast float %212 to i32 %214 = icmp ne i32 %213, 0 br i1 %214, label %IF, label %ENDIF IF: ; preds = %main_body %215 = fmul float 3.000000e+00, %51 %216 = fptosi float %215 to i32 %217 = bitcast i32 %216 to float %218 = bitcast float %217 to i32 %219 = shl i32 %218, 4 %220 = add i32 %219, 128 %221 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %220) %222 = shl i32 %218, 4 %223 = add i32 %222, 132 %224 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %223) %225 = shl i32 %218, 4 %226 = add i32 %225, 136 %227 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %226) %228 = shl i32 %218, 4 %229 = add i32 %228, 140 %230 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %229) %231 = fmul float %68, %221 %232 = fmul float %70, %224 %233 = fadd float %231, %232 %234 = fmul float %72, %227 %235 = fadd float %233, %234 %236 = fmul float %74, %230 %237 = fadd float %235, %236 %238 = fptosi float %215 to i32 %239 = bitcast i32 %238 to float %240 = bitcast float %239 to i32 %241 = add i32 1, %240 %242 = bitcast i32 %241 to float %243 = bitcast float %242 to i32 %244 = shl i32 %243, 4 %245 = add i32 %244, 128 %246 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %245) %247 = shl i32 %243, 4 %248 = add i32 %247, 132 %249 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %248) %250 = shl i32 %243, 4 %251 = add i32 %250, 136 %252 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %251) %253 = shl i32 %243, 4 %254 = add i32 %253, 140 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = fmul float %68, %246 %257 = fmul float %70, %249 %258 = fadd float %256, %257 %259 = fmul float %72, %252 %260 = fadd float %258, %259 %261 = fmul float %74, %255 %262 = fadd float %260, %261 %263 = fptosi float %215 to i32 %264 = bitcast i32 %263 to float %265 = bitcast float %264 to i32 %266 = add i32 2, %265 %267 = bitcast i32 %266 to float %268 = bitcast float %267 to i32 %269 = shl i32 %268, 4 %270 = add i32 %269, 128 %271 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %270) %272 = shl i32 %268, 4 %273 = add i32 %272, 132 %274 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %273) %275 = shl i32 %268, 4 %276 = add i32 %275, 136 %277 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %276) %278 = shl i32 %268, 4 %279 = add i32 %278, 140 %280 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %279) %281 = fmul float %68, %271 %282 = fmul float %70, %274 %283 = fadd float %281, %282 %284 = fmul float %72, %277 %285 = fadd float %283, %284 %286 = fmul float %74, %280 %287 = fadd float %285, %286 %288 = fmul float %44, %237 %289 = fadd float %288, %147 %290 = fmul float %44, %262 %291 = fadd float %290, %148 %292 = fmul float %44, %287 %293 = fadd float %292, %149 %294 = fptosi float %215 to i32 %295 = bitcast i32 %294 to float %296 = bitcast float %295 to i32 %297 = shl i32 %296, 4 %298 = add i32 %297, 128 %299 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %298) %300 = shl i32 %296, 4 %301 = add i32 %300, 132 %302 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %301) %303 = shl i32 %296, 4 %304 = add i32 %303, 136 %305 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %304) %306 = fmul float %57, %299 %307 = fmul float %58, %302 %308 = fadd float %307, %306 %309 = fmul float %59, %305 %310 = fadd float %308, %309 %311 = fptosi float %215 to i32 %312 = bitcast i32 %311 to float %313 = bitcast float %312 to i32 %314 = add i32 1, %313 %315 = bitcast i32 %314 to float %316 = bitcast float %315 to i32 %317 = shl i32 %316, 4 %318 = add i32 %317, 128 %319 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %318) %320 = shl i32 %316, 4 %321 = add i32 %320, 132 %322 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %321) %323 = shl i32 %316, 4 %324 = add i32 %323, 136 %325 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %324) %326 = fmul float %57, %319 %327 = fmul float %58, %322 %328 = fadd float %327, %326 %329 = fmul float %59, %325 %330 = fadd float %328, %329 %331 = fptosi float %215 to i32 %332 = bitcast i32 %331 to float %333 = bitcast float %332 to i32 %334 = add i32 2, %333 %335 = bitcast i32 %334 to float %336 = bitcast float %335 to i32 %337 = shl i32 %336, 4 %338 = add i32 %337, 128 %339 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %338) %340 = shl i32 %336, 4 %341 = add i32 %340, 132 %342 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %341) %343 = shl i32 %336, 4 %344 = add i32 %343, 136 %345 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %344) %346 = fmul float %57, %339 %347 = fmul float %58, %342 %348 = fadd float %347, %346 %349 = fmul float %59, %345 %350 = fadd float %348, %349 %351 = fmul float %44, %310 %352 = fadd float %351, %207 %353 = fmul float %44, %330 %354 = fadd float %353, %208 %355 = fmul float %44, %350 %356 = fadd float %355, %209 %357 = fcmp olt float 0.000000e+00, %45 %358 = sext i1 %357 to i32 %359 = bitcast i32 %358 to float %360 = bitcast float %359 to i32 %361 = icmp ne i32 %360, 0 br i1 %361, label %IF66, label %ENDIF ENDIF: ; preds = %IF66, %IF, %main_body %temp8.0 = phi float [ %147, %main_body ], [ %466, %IF66 ], [ %289, %IF ] %temp9.0 = phi float [ %148, %main_body ], [ %468, %IF66 ], [ %291, %IF ] %temp10.0 = phi float [ %149, %main_body ], [ %470, %IF66 ], [ %293, %IF ] %temp12.0 = phi float [ %207, %main_body ], [ %529, %IF66 ], [ %352, %IF ] %temp13.0 = phi float [ %208, %main_body ], [ %531, %IF66 ], [ %354, %IF ] %temp14.0 = phi float [ %209, %main_body ], [ %533, %IF66 ], [ %356, %IF ] %362 = fmul float %temp9.0, %20 %363 = fmul float %temp9.0, %21 %364 = fmul float %temp9.0, %22 %365 = fmul float %temp9.0, %23 %366 = fmul float %temp8.0, %16 %367 = fadd float %366, %362 %368 = fmul float %temp8.0, %17 %369 = fadd float %368, %363 %370 = fmul float %temp8.0, %18 %371 = fadd float %370, %364 %372 = fmul float %temp8.0, %19 %373 = fadd float %372, %365 %374 = fmul float %temp10.0, %24 %375 = fadd float %374, %367 %376 = fmul float %temp10.0, %25 %377 = fadd float %376, %369 %378 = fmul float %temp10.0, %26 %379 = fadd float %378, %371 %380 = fmul float %temp10.0, %27 %381 = fadd float %380, %373 %382 = fadd float %375, %28 %383 = fadd float %377, %29 %384 = fadd float %379, %30 %385 = fadd float %381, %31 %386 = fsub float -0.000000e+00, %13 %387 = fadd float %temp8.0, %386 %388 = fsub float -0.000000e+00, %14 %389 = fadd float %temp9.0, %388 %390 = fsub float -0.000000e+00, %15 %391 = fadd float %temp10.0, %390 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %temp12.0, float %temp13.0, float %temp14.0, float %64) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %387, float %389, float %391, float %65) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp8.0, float %temp9.0, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %382, float %383, float %385, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %382, float %383, float %384, float %385) ret void IF66: ; preds = %IF %392 = fmul float 3.000000e+00, %52 %393 = fptosi float %392 to i32 %394 = bitcast i32 %393 to float %395 = bitcast float %394 to i32 %396 = shl i32 %395, 4 %397 = add i32 %396, 128 %398 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %397) %399 = shl i32 %395, 4 %400 = add i32 %399, 132 %401 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %400) %402 = shl i32 %395, 4 %403 = add i32 %402, 136 %404 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %403) %405 = shl i32 %395, 4 %406 = add i32 %405, 140 %407 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %406) %408 = fmul float %68, %398 %409 = fmul float %70, %401 %410 = fadd float %408, %409 %411 = fmul float %72, %404 %412 = fadd float %410, %411 %413 = fmul float %74, %407 %414 = fadd float %412, %413 %415 = fptosi float %392 to i32 %416 = bitcast i32 %415 to float %417 = bitcast float %416 to i32 %418 = add i32 1, %417 %419 = bitcast i32 %418 to float %420 = bitcast float %419 to i32 %421 = shl i32 %420, 4 %422 = add i32 %421, 128 %423 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %422) %424 = shl i32 %420, 4 %425 = add i32 %424, 132 %426 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %425) %427 = shl i32 %420, 4 %428 = add i32 %427, 136 %429 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %428) %430 = shl i32 %420, 4 %431 = add i32 %430, 140 %432 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %431) %433 = fmul float %68, %423 %434 = fmul float %70, %426 %435 = fadd float %433, %434 %436 = fmul float %72, %429 %437 = fadd float %435, %436 %438 = fmul float %74, %432 %439 = fadd float %437, %438 %440 = fptosi float %392 to i32 %441 = bitcast i32 %440 to float %442 = bitcast float %441 to i32 %443 = add i32 2, %442 %444 = bitcast i32 %443 to float %445 = bitcast float %444 to i32 %446 = shl i32 %445, 4 %447 = add i32 %446, 128 %448 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %447) %449 = shl i32 %445, 4 %450 = add i32 %449, 132 %451 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %450) %452 = shl i32 %445, 4 %453 = add i32 %452, 136 %454 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %453) %455 = shl i32 %445, 4 %456 = add i32 %455, 140 %457 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %456) %458 = fmul float %68, %448 %459 = fmul float %70, %451 %460 = fadd float %458, %459 %461 = fmul float %72, %454 %462 = fadd float %460, %461 %463 = fmul float %74, %457 %464 = fadd float %462, %463 %465 = fmul float %45, %414 %466 = fadd float %465, %289 %467 = fmul float %45, %439 %468 = fadd float %467, %291 %469 = fmul float %45, %464 %470 = fadd float %469, %293 %471 = fptosi float %392 to i32 %472 = bitcast i32 %471 to float %473 = bitcast float %472 to i32 %474 = shl i32 %473, 4 %475 = add i32 %474, 128 %476 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %475) %477 = shl i32 %473, 4 %478 = add i32 %477, 132 %479 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %478) %480 = shl i32 %473, 4 %481 = add i32 %480, 136 %482 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %481) %483 = fmul float %57, %476 %484 = fmul float %58, %479 %485 = fadd float %484, %483 %486 = fmul float %59, %482 %487 = fadd float %485, %486 %488 = fptosi float %392 to i32 %489 = bitcast i32 %488 to float %490 = bitcast float %489 to i32 %491 = add i32 1, %490 %492 = bitcast i32 %491 to float %493 = bitcast float %492 to i32 %494 = shl i32 %493, 4 %495 = add i32 %494, 128 %496 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %495) %497 = shl i32 %493, 4 %498 = add i32 %497, 132 %499 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %498) %500 = shl i32 %493, 4 %501 = add i32 %500, 136 %502 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %501) %503 = fmul float %57, %496 %504 = fmul float %58, %499 %505 = fadd float %504, %503 %506 = fmul float %59, %502 %507 = fadd float %505, %506 %508 = fptosi float %392 to i32 %509 = bitcast i32 %508 to float %510 = bitcast float %509 to i32 %511 = add i32 2, %510 %512 = bitcast i32 %511 to float %513 = bitcast float %512 to i32 %514 = shl i32 %513, 4 %515 = add i32 %514, 128 %516 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %515) %517 = shl i32 %513, 4 %518 = add i32 %517, 132 %519 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %518) %520 = shl i32 %513, 4 %521 = add i32 %520, 136 %522 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %521) %523 = fmul float %57, %516 %524 = fmul float %58, %519 %525 = fadd float %524, %523 %526 = fmul float %59, %522 %527 = fadd float %525, %526 %528 = fmul float %45, %487 %529 = fadd float %528, %352 %530 = fmul float %45, %507 %531 = fadd float %530, %354 %532 = fmul float %45, %527 %533 = fadd float %532, %356 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v6, s10, v0 ; 4A0C000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[18:21], s[4:7][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80011206 V_MOV_B32_e32 v0, 3.000000e+00 ; 7E0002FF 40400000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v18, v0 ; 10000112 V_CVT_I32_F32_e32 v2, v0 ; 7E041100 V_LSHLREV_B32_e32 v3, 4, v2 ; 34060484 V_ADD_I32_e32 v0, 0x80, v3 ; 4A0006FF 00000080 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v5, s[0:3] + v0 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000500 S_LOAD_DWORDX4 s[4:7], s[8:9], 0xc ; C082090C S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[14:17], s[4:7][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010E06 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v14, v5 ; 10000B0E V_ADD_I32_e32 v1, 0x84, v3 ; 4A0206FF 00000084 BUFFER_LOAD_DWORD v7, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000701 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v15, v7, v0, 0, 0 ; D2820000 04020F0F V_ADD_I32_e32 v1, 0x88, v3 ; 4A0206FF 00000088 BUFFER_LOAD_DWORD v8, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000801 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v16, v8, v0, 0, 0 ; D2820000 04021110 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[10:13], s[4:7][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010A06 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v0, v10 ; 10001500 V_ADD_I32_e32 v1, 2, v2 ; 4A020482 V_LSHLREV_B32_e32 v9, 4, v1 ; 34120284 V_ADD_I32_e32 v1, 0x80, v9 ; 4A0212FF 00000080 BUFFER_LOAD_DWORD v26, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001A01 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v1, v14, v26 ; 1002350E V_ADD_I32_e32 v4, 0x84, v9 ; 4A0812FF 00000084 BUFFER_LOAD_DWORD v27, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001B04 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v15, v27, v1, 0, 0 ; D2820001 0406370F V_ADD_I32_e32 v4, 0x88, v9 ; 4A0812FF 00000088 BUFFER_LOAD_DWORD v28, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001C04 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v16, v28, v1, 0, 0 ; D2820001 04063910 V_MUL_F32_e32 v1, v1, v10 ; 10021501 V_ADD_I32_e32 v2, 1, v2 ; 4A040481 V_LSHLREV_B32_e32 v29, 4, v2 ; 343A0484 V_ADD_I32_e32 v2, 0x80, v29 ; 4A043AFF 00000080 BUFFER_LOAD_DWORD v30, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001E02 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v2, v14, v30 ; 10043D0E V_ADD_I32_e32 v4, 0x84, v29 ; 4A083AFF 00000084 BUFFER_LOAD_DWORD v31, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001F04 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v2, v15, v31, v2, 0, 0 ; D2820002 040A3F0F V_ADD_I32_e32 v4, 0x88, v29 ; 4A083AFF 00000088 BUFFER_LOAD_DWORD v32, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002004 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v2, v16, v32, v2, 0, 0 ; D2820002 040A4110 V_MUL_F32_e32 v4, v2, v10 ; 10081502 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[22:25], s[4:7][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80011606 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v2, v23, v7 ; 10040F17 V_MAD_F32 v2, v22, v5, v2, 0, 0 ; D2820002 040A0B16 V_MAD_F32 v2, v24, v8, v2, 0, 0 ; D2820002 040A1118 V_ADD_I32_e32 v3, 0x8c, v3 ; 4A0606FF 0000008C BUFFER_LOAD_DWORD v3, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000303 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v2, v2, v3 ; 06040702 V_MUL_F32_e32 v2, v2, v10 ; 10041502 V_MUL_F32_e32 v3, v23, v27 ; 10063717 V_MAD_F32 v3, v22, v26, v3, 0, 0 ; D2820003 040E3516 V_MAD_F32 v3, v24, v28, v3, 0, 0 ; D2820003 040E3918 V_ADD_I32_e32 v5, 0x8c, v9 ; 4A0A12FF 0000008C BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v3, v3, v5 ; 06060B03 V_MUL_F32_e32 v3, v3, v10 ; 10061503 V_MUL_F32_e32 v5, v23, v31 ; 100A3F17 V_MAD_F32 v5, v22, v30, v5, 0, 0 ; D2820005 04163D16 V_MAD_F32 v5, v24, v32, v5, 0, 0 ; D2820005 04164118 V_ADD_I32_e32 v7, 0x8c, v29 ; 4A0E3AFF 0000008C BUFFER_LOAD_DWORD v7, s[0:3] + v7 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000707 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v5, v5, v7 ; 060A0F05 V_MUL_F32_e32 v5, v5, v10 ; 100A1505 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x10 ; C0820910 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[6:9], s[4:7][v6] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010606 V_CMP_GT_F32_e64 s[4:5], v11, 0.000000e+00, 0, 0 ; D0080004 0001010B V_MOV_B32_e32 v26, 1.000000e+00 ; 7E3402F2 S_WAITCNT vmcnt(0) ; BF8C0770 S_AND_SAVEEXEC_B64 s[4:5], s[4:5] ; BE842404 S_XOR_B64 s[4:5], exec, s[4:5] ; 8984047E S_CBRANCH_EXECZ BB0_3 ; BF880000 V_MOV_B32_e32 v27, 3.000000e+00 ; 7E3602FF 40400000 V_MUL_F32_e32 v27, v19, v27 ; 10363713 V_CVT_I32_F32_e32 v27, v27 ; 7E36111B V_LSHLREV_B32_e32 v28, 4, v27 ; 34383684 V_ADD_I32_e32 v29, 0x84, v28 ; 4A3A38FF 00000084 BUFFER_LOAD_DWORD v29, s[0:3] + v29 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001D1D V_ADD_I32_e32 v30, 0x80, v28 ; 4A3C38FF 00000080 BUFFER_LOAD_DWORD v30, s[0:3] + v30 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001E1E S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v31, v14, v30 ; 103E3D0E V_MAD_F32 v31, v15, v29, v31, 0, 0 ; D282001F 047E3B0F V_ADD_I32_e32 v32, 0x88, v28 ; 4A4038FF 00000088 BUFFER_LOAD_DWORD v32, s[0:3] + v32 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002020 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v31, v16, v32, v31, 0, 0 ; D282001F 047E4110 V_MAD_F32 v0, v11, v31, v0, 0, 0 ; D2820000 04023F0B V_ADD_I32_e32 v31, 2, v27 ; 4A3E3682 V_LSHLREV_B32_e32 v31, 4, v31 ; 343E3E84 V_ADD_I32_e32 v33, 0x84, v31 ; 4A423EFF 00000084 BUFFER_LOAD_DWORD v33, s[0:3] + v33 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002121 V_ADD_I32_e32 v34, 0x80, v31 ; 4A443EFF 00000080 BUFFER_LOAD_DWORD v34, s[0:3] + v34 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002222 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v35, v14, v34 ; 1046450E V_MAD_F32 v35, v15, v33, v35, 0, 0 ; D2820023 048E430F V_ADD_I32_e32 v36, 0x88, v31 ; 4A483EFF 00000088 BUFFER_LOAD_DWORD v36, s[0:3] + v36 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002424 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v35, v16, v36, v35, 0, 0 ; D2820023 048E4910 V_MAD_F32 v1, v11, v35, v1, 0, 0 ; D2820001 0406470B V_ADD_I32_e32 v27, 1, v27 ; 4A363681 V_LSHLREV_B32_e32 v27, 4, v27 ; 34363684 V_ADD_I32_e32 v35, 0x84, v27 ; 4A4636FF 00000084 BUFFER_LOAD_DWORD v35, s[0:3] + v35 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002323 V_ADD_I32_e32 v37, 0x80, v27 ; 4A4A36FF 00000080 BUFFER_LOAD_DWORD v37, s[0:3] + v37 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002525 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v38, v14, v37 ; 104C4B0E V_MAD_F32 v38, v15, v35, v38, 0, 0 ; D2820026 049A470F V_ADD_I32_e32 v39, 0x88, v27 ; 4A4E36FF 00000088 BUFFER_LOAD_DWORD v39, s[0:3] + v39 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002727 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v38, v16, v39, v38, 0, 0 ; D2820026 049A4F10 V_MAD_F32 v4, v11, v38, v4, 0, 0 ; D2820004 04124D0B V_MUL_F32_e32 v29, v23, v29 ; 103A3B17 V_MAD_F32 v29, v22, v30, v29, 0, 0 ; D282001D 04763D16 V_MAD_F32 v29, v24, v32, v29, 0, 0 ; D282001D 04764118 V_ADD_I32_e32 v28, 0x8c, v28 ; 4A3838FF 0000008C BUFFER_LOAD_DWORD v28, s[0:3] + v28 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001C1C S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v28, v26, v28, v29, 0, 0 ; D282001C 0476391A V_MAD_F32 v2, v11, v28, v2, 0, 0 ; D2820002 040A390B V_MUL_F32_e32 v28, v23, v33 ; 10384317 V_MAD_F32 v28, v22, v34, v28, 0, 0 ; D282001C 04724516 V_MAD_F32 v28, v24, v36, v28, 0, 0 ; D282001C 04724918 V_ADD_I32_e32 v29, 0x8c, v31 ; 4A3A3EFF 0000008C BUFFER_LOAD_DWORD v29, s[0:3] + v29 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001D1D S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v28, v26, v29, v28, 0, 0 ; D282001C 04723B1A V_MAD_F32 v3, v11, v28, v3, 0, 0 ; D2820003 040E390B V_MUL_F32_e32 v28, v23, v35 ; 10384717 V_MAD_F32 v28, v22, v37, v28, 0, 0 ; D282001C 04724B16 V_MAD_F32 v28, v24, v39, v28, 0, 0 ; D282001C 04724F18 V_ADD_I32_e32 v27, 0x8c, v27 ; 4A3636FF 0000008C BUFFER_LOAD_DWORD v27, s[0:3] + v27 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001B1B S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v27, v26, v27, v28, 0, 0 ; D282001B 0472371A V_MAD_F32 v5, v11, v27, v5, 0, 0 ; D2820005 0416370B V_CMP_GT_F32_e64 s[6:7], v12, 0.000000e+00, 0, 0 ; D0080006 0001010C S_AND_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862406 S_XOR_B64 s[6:7], exec, s[6:7] ; 8986067E S_CBRANCH_EXECZ BB0_2 ; BF880000 V_MOV_B32_e32 v27, 3.000000e+00 ; 7E3602FF 40400000 V_MUL_F32_e32 v18, v20, v27 ; 10243714 V_CVT_I32_F32_e32 v18, v18 ; 7E241112 V_LSHLREV_B32_e32 v19, 4, v18 ; 34262484 V_ADD_I32_e32 v20, 0x84, v19 ; 4A2826FF 00000084 BUFFER_LOAD_DWORD v20, s[0:3] + v20 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001414 V_ADD_I32_e32 v21, 0x80, v19 ; 4A2A26FF 00000080 BUFFER_LOAD_DWORD v21, s[0:3] + v21 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001515 S_WAITCNT vmcnt(0) expcnt(0) ; BF8C0700 V_MUL_F32_e32 v27, v14, v21 ; 10362B0E V_MAD_F32 v27, v15, v20, v27, 0, 0 ; D282001B 046E290F V_ADD_I32_e32 v28, 0x88, v19 ; 4A3826FF 00000088 BUFFER_LOAD_DWORD v28, s[0:3] + v28 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001C1C S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v27, v16, v28, v27, 0, 0 ; D282001B 046E3910 V_MAD_F32 v0, v12, v27, v0, 0, 0 ; D2820000 0402370C V_ADD_I32_e32 v27, 2, v18 ; 4A362482 V_LSHLREV_B32_e32 v27, 4, v27 ; 34363684 V_ADD_I32_e32 v29, 0x84, v27 ; 4A3A36FF 00000084 BUFFER_LOAD_DWORD v29, s[0:3] + v29 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001D1D V_ADD_I32_e32 v30, 0x80, v27 ; 4A3C36FF 00000080 BUFFER_LOAD_DWORD v30, s[0:3] + v30 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001E1E S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v31, v14, v30 ; 103E3D0E V_MAD_F32 v31, v15, v29, v31, 0, 0 ; D282001F 047E3B0F V_ADD_I32_e32 v32, 0x88, v27 ; 4A4036FF 00000088 BUFFER_LOAD_DWORD v32, s[0:3] + v32 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002020 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v31, v16, v32, v31, 0, 0 ; D282001F 047E4110 V_MAD_F32 v1, v12, v31, v1, 0, 0 ; D2820001 04063F0C V_ADD_I32_e32 v18, 1, v18 ; 4A242481 V_LSHLREV_B32_e32 v18, 4, v18 ; 34242484 V_ADD_I32_e32 v31, 0x84, v18 ; 4A3E24FF 00000084 BUFFER_LOAD_DWORD v31, s[0:3] + v31 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80001F1F V_ADD_I32_e32 v33, 0x80, v18 ; 4A4224FF 00000080 BUFFER_LOAD_DWORD v33, s[0:3] + v33 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002121 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v34, v14, v33 ; 1044430E V_MAD_F32 v34, v15, v31, v34, 0, 0 ; D2820022 048A3F0F V_ADD_I32_e32 v35, 0x88, v18 ; 4A4624FF 00000088 BUFFER_LOAD_DWORD v35, s[0:3] + v35 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80002323 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v14, v16, v35, v34, 0, 0 ; D282000E 048A4710 V_MAD_F32 v4, v12, v14, v4, 0, 0 ; D2820004 04121D0C V_MUL_F32_e32 v14, v23, v20 ; 101C2917 V_MAD_F32 v14, v22, v21, v14, 0, 0 ; D282000E 043A2B16 V_MAD_F32 v14, v24, v28, v14, 0, 0 ; D282000E 043A3918 V_ADD_I32_e32 v15, 0x8c, v19 ; 4A1E26FF 0000008C BUFFER_LOAD_DWORD v15, s[0:3] + v15 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000F0F S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v14, v26, v15, v14, 0, 0 ; D282000E 043A1F1A V_MAD_F32 v2, v12, v14, v2, 0, 0 ; D2820002 040A1D0C V_MUL_F32_e32 v14, v23, v29 ; 101C3B17 V_MAD_F32 v14, v22, v30, v14, 0, 0 ; D282000E 043A3D16 V_MAD_F32 v14, v24, v32, v14, 0, 0 ; D282000E 043A4118 V_ADD_I32_e32 v15, 0x8c, v27 ; 4A1E36FF 0000008C BUFFER_LOAD_DWORD v15, s[0:3] + v15 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000F0F S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v14, v26, v15, v14, 0, 0 ; D282000E 043A1F1A V_MAD_F32 v3, v12, v14, v3, 0, 0 ; D2820003 040E1D0C V_MUL_F32_e32 v14, v23, v31 ; 101C3F17 V_MAD_F32 v14, v22, v33, v14, 0, 0 ; D282000E 043A4316 V_MAD_F32 v14, v24, v35, v14, 0, 0 ; D282000E 043A4718 V_ADD_I32_e32 v15, 0x8c, v18 ; 4A1E24FF 0000008C BUFFER_LOAD_DWORD v15, s[0:3] + v15 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000F0F S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v14, v26, v15, v14, 0, 0 ; D282000E 043A1F1A V_MAD_F32 v5, v12, v14, v5, 0, 0 ; D2820005 04161D0C S_OR_B64 exec, exec, s[6:7] ; 88FE067E S_OR_B64 exec, exec, s[4:5] ; 88FE047E S_BUFFER_LOAD_DWORD s6, s[0:3], 0x1f ; C203011F S_BUFFER_LOAD_DWORD s7, s[0:3], 0x1e ; C203811E S_BUFFER_LOAD_DWORD s8, s[0:3], 0x1d ; C204011D S_BUFFER_LOAD_DWORD s9, s[0:3], 0x1c ; C204811C S_BUFFER_LOAD_DWORD s10, s[0:3], 0x1b ; C205011B S_BUFFER_LOAD_DWORD s11, s[0:3], 0x1a ; C205811A S_BUFFER_LOAD_DWORD s12, s[0:3], 0x19 ; C2060119 S_BUFFER_LOAD_DWORD s13, s[0:3], 0x18 ; C2068118 S_BUFFER_LOAD_DWORD s14, s[0:3], 0x17 ; C2070117 S_BUFFER_LOAD_DWORD s15, s[0:3], 0x16 ; C2078116 S_BUFFER_LOAD_DWORD s16, s[0:3], 0x15 ; C2080115 S_BUFFER_LOAD_DWORD s17, s[0:3], 0x14 ; C2088114 S_BUFFER_LOAD_DWORD s18, s[0:3], 0x13 ; C2090113 S_BUFFER_LOAD_DWORD s19, s[0:3], 0x12 ; C2098112 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x11 ; C20A0111 S_BUFFER_LOAD_DWORD s21, s[0:3], 0x10 ; C20A8110 S_BUFFER_LOAD_DWORD s22, s[0:3], 0xe ; C20B010E S_BUFFER_LOAD_DWORD s23, s[0:3], 0xd ; C20B810D S_BUFFER_LOAD_DWORD s24, s[0:3], 0xc ; C20C010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v10, s6 ; 7E140206 V_MOV_B32_e32 v11, s7 ; 7E160207 V_MOV_B32_e32 v12, s8 ; 7E180208 V_MOV_B32_e32 v13, s9 ; 7E1A0209 V_MOV_B32_e32 v14, s10 ; 7E1C020A V_MOV_B32_e32 v15, s11 ; 7E1E020B V_MOV_B32_e32 v16, s12 ; 7E20020C V_MOV_B32_e32 v17, s13 ; 7E22020D V_MOV_B32_e32 v18, s14 ; 7E24020E V_MOV_B32_e32 v19, s15 ; 7E26020F V_MOV_B32_e32 v20, s16 ; 7E280210 V_MOV_B32_e32 v21, s17 ; 7E2A0211 V_MOV_B32_e32 v22, s18 ; 7E2C0212 V_MOV_B32_e32 v23, s19 ; 7E2E0213 V_MOV_B32_e32 v24, s20 ; 7E300214 V_MOV_B32_e32 v25, s21 ; 7E320215 V_MOV_B32_e32 v26, s22 ; 7E340216 V_MOV_B32_e32 v27, s23 ; 7E360217 V_MOV_B32_e32 v28, s24 ; 7E380218 EXP 15, 32, 0, 0, 0, v0, v4, v1, v6 ; F800020F 06010400 S_WAITCNT expcnt(0) ; BF8C070F V_SUB_F32_e32 v0, v3, v26 ; 08003503 V_SUB_F32_e32 v1, v5, v27 ; 08023705 V_SUB_F32_e32 v4, v2, v28 ; 08083902 EXP 15, 33, 0, 0, 0, v4, v1, v0, v7 ; F800021F 07000104 S_WAITCNT expcnt(0) ; BF8C070F V_MOV_B32_e32 v0, 1.000000e+00 ; 7E0002F2 EXP 15, 34, 0, 0, 0, v2, v5, v3, v0 ; F800022F 00030502 V_MUL_F32_e32 v1, v5, v18 ; 10022505 V_MAD_F32 v1, v2, v22, v1, 0, 0 ; D2820001 04062D02 V_MAD_F32 v1, v3, v14, v1, 0, 0 ; D2820001 04061D03 V_ADD_F32_e32 v1, v1, v10 ; 06021501 V_MUL_F32_e32 v4, v5, v20 ; 10082905 V_MAD_F32 v4, v2, v24, v4, 0, 0 ; D2820004 04123102 V_MAD_F32 v4, v3, v16, v4, 0, 0 ; D2820004 04122103 V_ADD_F32_e32 v4, v4, v12 ; 06081904 V_MUL_F32_e32 v6, v5, v21 ; 100C2B05 V_MAD_F32 v6, v2, v25, v6, 0, 0 ; D2820006 041A3302 V_MAD_F32 v6, v3, v17, v6, 0, 0 ; D2820006 041A2303 V_ADD_F32_e32 v6, v6, v13 ; 060C1B06 EXP 15, 35, 0, 0, 0, v6, v4, v1, v0 ; F800023F 00010406 S_WAITCNT expcnt(0) ; BF8C070F V_MUL_F32_e32 v0, v5, v19 ; 10002705 V_MAD_F32 v0, v2, v23, v0, 0, 0 ; D2820000 04022F02 V_MAD_F32 v0, v3, v15, v0, 0, 0 ; D2820000 04021F03 V_ADD_F32_e32 v0, v0, v11 ; 06001700 EXP 15, 12, 0, 1, 0, v6, v4, v0, v1 ; F80008CF 01000406 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..5] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 1.4427, 1.0000, 0.5000, 0.0000} IMM[1] FLT32 { 0.0000, 0.3300, 0.0000, 0.0000} 0: ADD TEMP[0].xyz, -CONST[3], IN[2] 1: MUL TEMP[1].w, TEMP[0].zzzz, CONST[1].xxxx 2: MUL TEMP[1].w, TEMP[1].wwww, IMM[0].xxxx 3: EX2 TEMP[1].x, TEMP[1].wwww 4: ADD TEMP[1].w, -TEMP[1].xxxx, IMM[0].yyyy 5: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 6: RCP TEMP[3].x, TEMP[0].zzzz 7: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1].yyyy 8: MUL TEMP[1].x, TEMP[1].wwww, TEMP[2].xxxx 9: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx 10: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 11: MOV TEMP[0].x, TEMP[1].xxxx 12: EX2 TEMP[1].x, TEMP[1].xxxx 13: MOV_SAT TEMP[0].x, TEMP[1].xxxx 14: ADD TEMP[1].x, -TEMP[0].xxxx, IMM[0].yyyy 15: RCP TEMP[2].x, IN[3].zzzz 16: MUL TEMP[2].y, TEMP[2].xxxx, IN[3].yyyy 17: MAD TEMP[2].x, TEMP[2].yyyy, IMM[0].zzzz, IMM[0].zzzz 18: MOV TEMP[2].x, TEMP[2].xxxx 19: MOV TEMP[2].y, CONST[1].wwww 20: MOV TEMP[3].xy, TEMP[2].xyyy 21: TEX TEMP[3].x, TEMP[3], SAMP[5], 2D 22: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 23: MOV TEMP[0].x, TEMP[1].xxxx 24: MOV TEMP[3].w, IMM[0].wwww 25: MOV TEMP[3].x, IN[1].xxxx 26: MOV TEMP[3].y, IN[1].yyyy 27: MOV TEMP[3].z, IN[1].zzzz 28: DP4 TEMP[4].x, TEMP[3], TEMP[3] 29: RSQ TEMP[4].x, TEMP[4].xxxx 30: MUL TEMP[3].xyz, TEMP[3], TEMP[4].xxxx 31: MOV TEMP[4].w, IMM[0].wwww 32: MOV TEMP[4].x, IN[0].xxxx 33: MOV TEMP[4].y, IN[0].yyyy 34: MOV TEMP[4].z, IN[0].zzzz 35: DP4 TEMP[5].x, TEMP[4], TEMP[4] 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4], TEMP[5].xxxx 38: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[4].xyzz 39: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 40: MAD TEMP[3].yzw, TEMP[4].xxyz, -TEMP[5].yyyy, TEMP[3].xxyz 41: MOV TEMP[4].xyz, TEMP[4].xyzz 42: TEX TEMP[4], TEMP[4], SAMP[2], CUBE 43: MUL TEMP[4].xyz, TEMP[4], TEMP[4] 44: MOV TEMP[5].xyz, TEMP[3].yzww 45: TEX TEMP[5], TEMP[5], SAMP[4], CUBE 46: MOV TEMP[6].w, TEMP[5].wwww 47: MOV TEMP[3].xyz, TEMP[3].yzww 48: TEX TEMP[3].xyz, TEMP[3], SAMP[3], CUBE 49: MUL TEMP[7].yzw, TEMP[3].xxyz, TEMP[3].xxyz 50: MOV TEMP[0].yzw, TEMP[7].zyzw 51: MAD TEMP[5].xyz, TEMP[5], TEMP[5], -TEMP[7].yzww 52: MOV TEMP[3].x, IN[0].wwww 53: MOV TEMP[3].y, IN[1].wwww 54: MOV TEMP[7].xy, TEMP[3].xyyy 55: TEX TEMP[7], TEMP[7], SAMP[1], 2D 56: MOV TEMP[8].xy, TEMP[3].xyyy 57: TEX TEMP[8], TEMP[8], SAMP[0], 2D 58: MUL TEMP[3], TEMP[8], TEMP[8] 59: MUL TEMP[7], TEMP[7], TEMP[7] 60: MUL TEMP[8].w, TEMP[7].wwww, TEMP[7].wwww 61: MUL TEMP[8], TEMP[8].wwww, TEMP[8].wwww 62: MOV_SAT TEMP[8], TEMP[8] 63: MOV TEMP[2].w, TEMP[8].wwww 64: MAD TEMP[5].yzw, TEMP[8].wwww, TEMP[5].xxyz, TEMP[0] 65: MOV TEMP[0].yzw, TEMP[5].zyzw 66: MUL TEMP[5].yzw, TEMP[0], TEMP[7].xxyz 67: MOV TEMP[0].yzw, TEMP[5].zyzw 68: MUL TEMP[5].xyz, CONST[4], CONST[4] 69: MOV TEMP[6].xyz, TEMP[5].xyzx 70: MOV TEMP[7].xyz, CONST[4].xyzx 71: MAD TEMP[5].xyz, TEMP[7], -TEMP[7], IMM[0].yyyy 72: MOV TEMP[7].xyz, TEMP[5].xyzx 73: MAD TEMP[5].xyz, TEMP[3].wwww, TEMP[7], TEMP[6] 74: MOV TEMP[6].xyz, TEMP[5].xyzx 75: MUL TEMP[3].xyz, TEMP[3], TEMP[6] 76: MAD TEMP[0].yzw, TEMP[3].xxyz, TEMP[4].xxyz, TEMP[0] 77: MAX TEMP[3].x, TEMP[0].yyyy, IMM[1].xxxx 78: RSQ TEMP[3].x, TEMP[3].xxxx 79: RCP TEMP[2].x, TEMP[3].xxxx 80: MAX TEMP[3].x, TEMP[0].zzzz, IMM[1].xxxx 81: RSQ TEMP[3].x, TEMP[3].xxxx 82: MAX TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx 83: RSQ TEMP[4].x, TEMP[0].xxxx 84: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx 85: CMP TEMP[4].x, -TEMP[0].xxxx, TEMP[4].xxxx, IMM[0].wwww 86: MOV TEMP[2].z, TEMP[4].xxxx 87: RCP TEMP[0].x, TEMP[3].xxxx 88: MOV TEMP[2].y, TEMP[0].xxxx 89: ADD TEMP[0].yzw, -TEMP[2].xxyz, CONST[2].xxyz 90: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[2] 91: MOV TEMP[0].xyz, TEMP[0].xyzx 92: ADD TEMP[1].x, -CONST[0].xxxx, IN[2].zzzz 93: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww 94: UIF TEMP[1].xxxx :0 95: MOV TEMP[1].x, IMM[0].yyyy 96: ELSE :0 97: MOV TEMP[1].x, IMM[1].yyyy 98: ENDIF 99: ADD TEMP[2].y, -CONST[5].xxxx, CONST[5].yyyy 100: MAD TEMP[2].y, CONST[5].zzzz, TEMP[2].yyyy, CONST[5].xxxx 101: MUL TEMP[1].w, TEMP[2].yyyy, TEMP[1].xxxx 102: MOV TEMP[0].w, TEMP[1].wwww 103: MOV OUT[0], TEMP[0] 104: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %40 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %41 = load <8 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %43 = load <4 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %49 = load <8 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %51 = load <4 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %53 = load <8 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %55 = load <4 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %57 = load <8 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %59 = load <4 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %61 = load <8 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %63 = load <4 x i32> addrspace(2)* %62, !tbaa !0 %64 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %77 = fsub float -0.000000e+00, %31 %78 = fadd float %77, %72 %79 = fsub float -0.000000e+00, %32 %80 = fadd float %79, %73 %81 = fsub float -0.000000e+00, %33 %82 = fadd float %81, %74 %83 = fmul float %82, %25 %84 = fmul float %83, 0x3FF7154CA0000000 %85 = call float @llvm.AMDIL.exp.(float %84) %86 = fsub float -0.000000e+00, %85 %87 = fadd float %86, 1.000000e+00 %88 = fmul float %78, %78 %89 = fmul float %80, %80 %90 = fadd float %89, %88 %91 = fmul float %82, %82 %92 = fadd float %90, %91 %93 = fdiv float 1.000000e+00, %82 %94 = fmul float %92, %26 %95 = fmul float %87, %94 %96 = fmul float %93, %95 %97 = fmul float %96, 0x3FF7154CA0000000 %98 = call float @llvm.AMDIL.exp.(float %97) %99 = call float @llvm.AMDIL.clamp.(float %98, float 0.000000e+00, float 1.000000e+00) %100 = fsub float -0.000000e+00, %99 %101 = fadd float %100, 1.000000e+00 %102 = fdiv float 1.000000e+00, %76 %103 = fmul float %102, %75 %104 = fmul float %103, 5.000000e-01 %105 = fadd float %104, 5.000000e-01 %106 = bitcast float %105 to i32 %107 = bitcast float %27 to i32 %108 = insertelement <2 x i32> undef, i32 %106, i32 0 %109 = insertelement <2 x i32> %108, i32 %107, i32 1 %110 = bitcast <8 x i32> %61 to <32 x i8> %111 = bitcast <4 x i32> %63 to <16 x i8> %112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %109, <32 x i8> %110, <16 x i8> %111, i32 2) %113 = extractelement <4 x float> %112, i32 0 %114 = fmul float %101, %113 %115 = fmul float %68, %68 %116 = fmul float %69, %69 %117 = fadd float %115, %116 %118 = fmul float %70, %70 %119 = fadd float %117, %118 %120 = fmul float 0.000000e+00, 0.000000e+00 %121 = fadd float %119, %120 %122 = call float @llvm.AMDGPU.rsq.clamped.f32(float %121) %123 = fmul float %68, %122 %124 = fmul float %69, %122 %125 = fmul float %70, %122 %126 = fmul float %64, %64 %127 = fmul float %65, %65 %128 = fadd float %126, %127 %129 = fmul float %66, %66 %130 = fadd float %128, %129 %131 = fmul float 0.000000e+00, 0.000000e+00 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %64, %133 %135 = fmul float %65, %133 %136 = fmul float %66, %133 %137 = fmul float %123, %134 %138 = fmul float %124, %135 %139 = fadd float %138, %137 %140 = fmul float %125, %136 %141 = fadd float %139, %140 %142 = fadd float %141, %141 %143 = fsub float -0.000000e+00, %142 %144 = fmul float %134, %143 %145 = fadd float %144, %123 %146 = fsub float -0.000000e+00, %142 %147 = fmul float %135, %146 %148 = fadd float %147, %124 %149 = fsub float -0.000000e+00, %142 %150 = fmul float %136, %149 %151 = fadd float %150, %125 %152 = insertelement <4 x float> undef, float %134, i32 0 %153 = insertelement <4 x float> %152, float %135, i32 1 %154 = insertelement <4 x float> %153, float %136, i32 2 %155 = insertelement <4 x float> %154, float 0.000000e+00, i32 3 %156 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %155) %157 = extractelement <4 x float> %156, i32 0 %158 = extractelement <4 x float> %156, i32 1 %159 = extractelement <4 x float> %156, i32 2 %160 = extractelement <4 x float> %156, i32 3 %161 = call float @fabs(float %159) %162 = fdiv float 1.000000e+00, %161 %163 = fmul float %157, %162 %164 = fadd float %163, 1.500000e+00 %165 = fmul float %158, %162 %166 = fadd float %165, 1.500000e+00 %167 = bitcast float %166 to i32 %168 = bitcast float %164 to i32 %169 = bitcast float %160 to i32 %170 = insertelement <4 x i32> undef, i32 %167, i32 0 %171 = insertelement <4 x i32> %170, i32 %168, i32 1 %172 = insertelement <4 x i32> %171, i32 %169, i32 2 %173 = insertelement <4 x i32> %172, i32 undef, i32 3 %174 = bitcast <8 x i32> %49 to <32 x i8> %175 = bitcast <4 x i32> %51 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 4) %177 = extractelement <4 x float> %176, i32 0 %178 = extractelement <4 x float> %176, i32 1 %179 = extractelement <4 x float> %176, i32 2 %180 = fmul float %177, %177 %181 = fmul float %178, %178 %182 = fmul float %179, %179 %183 = insertelement <4 x float> undef, float %145, i32 0 %184 = insertelement <4 x float> %183, float %148, i32 1 %185 = insertelement <4 x float> %184, float %151, i32 2 %186 = insertelement <4 x float> %185, float 0.000000e+00, i32 3 %187 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %186) %188 = extractelement <4 x float> %187, i32 0 %189 = extractelement <4 x float> %187, i32 1 %190 = extractelement <4 x float> %187, i32 2 %191 = extractelement <4 x float> %187, i32 3 %192 = call float @fabs(float %190) %193 = fdiv float 1.000000e+00, %192 %194 = fmul float %188, %193 %195 = fadd float %194, 1.500000e+00 %196 = fmul float %189, %193 %197 = fadd float %196, 1.500000e+00 %198 = bitcast float %197 to i32 %199 = bitcast float %195 to i32 %200 = bitcast float %191 to i32 %201 = insertelement <4 x i32> undef, i32 %198, i32 0 %202 = insertelement <4 x i32> %201, i32 %199, i32 1 %203 = insertelement <4 x i32> %202, i32 %200, i32 2 %204 = insertelement <4 x i32> %203, i32 undef, i32 3 %205 = bitcast <8 x i32> %57 to <32 x i8> %206 = bitcast <4 x i32> %59 to <16 x i8> %207 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %204, <32 x i8> %205, <16 x i8> %206, i32 4) %208 = extractelement <4 x float> %207, i32 0 %209 = extractelement <4 x float> %207, i32 1 %210 = extractelement <4 x float> %207, i32 2 %211 = insertelement <4 x float> undef, float %145, i32 0 %212 = insertelement <4 x float> %211, float %148, i32 1 %213 = insertelement <4 x float> %212, float %151, i32 2 %214 = insertelement <4 x float> %213, float %151, i32 3 %215 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %214) %216 = extractelement <4 x float> %215, i32 0 %217 = extractelement <4 x float> %215, i32 1 %218 = extractelement <4 x float> %215, i32 2 %219 = extractelement <4 x float> %215, i32 3 %220 = call float @fabs(float %218) %221 = fdiv float 1.000000e+00, %220 %222 = fmul float %216, %221 %223 = fadd float %222, 1.500000e+00 %224 = fmul float %217, %221 %225 = fadd float %224, 1.500000e+00 %226 = bitcast float %225 to i32 %227 = bitcast float %223 to i32 %228 = bitcast float %219 to i32 %229 = insertelement <4 x i32> undef, i32 %226, i32 0 %230 = insertelement <4 x i32> %229, i32 %227, i32 1 %231 = insertelement <4 x i32> %230, i32 %228, i32 2 %232 = insertelement <4 x i32> %231, i32 undef, i32 3 %233 = bitcast <8 x i32> %53 to <32 x i8> %234 = bitcast <4 x i32> %55 to <16 x i8> %235 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %232, <32 x i8> %233, <16 x i8> %234, i32 4) %236 = extractelement <4 x float> %235, i32 0 %237 = extractelement <4 x float> %235, i32 1 %238 = extractelement <4 x float> %235, i32 2 %239 = fmul float %236, %236 %240 = fmul float %237, %237 %241 = fmul float %238, %238 %242 = fsub float -0.000000e+00, %239 %243 = fmul float %208, %208 %244 = fadd float %243, %242 %245 = fsub float -0.000000e+00, %240 %246 = fmul float %209, %209 %247 = fadd float %246, %245 %248 = fsub float -0.000000e+00, %241 %249 = fmul float %210, %210 %250 = fadd float %249, %248 %251 = bitcast float %67 to i32 %252 = bitcast float %71 to i32 %253 = insertelement <2 x i32> undef, i32 %251, i32 0 %254 = insertelement <2 x i32> %253, i32 %252, i32 1 %255 = bitcast <8 x i32> %45 to <32 x i8> %256 = bitcast <4 x i32> %47 to <16 x i8> %257 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %254, <32 x i8> %255, <16 x i8> %256, i32 2) %258 = extractelement <4 x float> %257, i32 0 %259 = extractelement <4 x float> %257, i32 1 %260 = extractelement <4 x float> %257, i32 2 %261 = extractelement <4 x float> %257, i32 3 %262 = bitcast float %67 to i32 %263 = bitcast float %71 to i32 %264 = insertelement <2 x i32> undef, i32 %262, i32 0 %265 = insertelement <2 x i32> %264, i32 %263, i32 1 %266 = bitcast <8 x i32> %41 to <32 x i8> %267 = bitcast <4 x i32> %43 to <16 x i8> %268 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %265, <32 x i8> %266, <16 x i8> %267, i32 2) %269 = extractelement <4 x float> %268, i32 0 %270 = extractelement <4 x float> %268, i32 1 %271 = extractelement <4 x float> %268, i32 2 %272 = extractelement <4 x float> %268, i32 3 %273 = fmul float %269, %269 %274 = fmul float %270, %270 %275 = fmul float %271, %271 %276 = fmul float %272, %272 %277 = fmul float %258, %258 %278 = fmul float %259, %259 %279 = fmul float %260, %260 %280 = fmul float %261, %261 %281 = fmul float %280, %280 %282 = fmul float %281, %281 %283 = fmul float %281, %281 %284 = fmul float %281, %281 %285 = fmul float %281, %281 %286 = call float @llvm.AMDIL.clamp.(float %282, float 0.000000e+00, float 1.000000e+00) %287 = call float @llvm.AMDIL.clamp.(float %283, float 0.000000e+00, float 1.000000e+00) %288 = call float @llvm.AMDIL.clamp.(float %284, float 0.000000e+00, float 1.000000e+00) %289 = call float @llvm.AMDIL.clamp.(float %285, float 0.000000e+00, float 1.000000e+00) %290 = fmul float %289, %244 %291 = fadd float %290, %239 %292 = fmul float %289, %247 %293 = fadd float %292, %240 %294 = fmul float %289, %250 %295 = fadd float %294, %241 %296 = fmul float %291, %277 %297 = fmul float %293, %278 %298 = fmul float %295, %279 %299 = fmul float %34, %34 %300 = fmul float %35, %35 %301 = fmul float %36, %36 %302 = fsub float -0.000000e+00, %34 %303 = fmul float %34, %302 %304 = fadd float %303, 1.000000e+00 %305 = fsub float -0.000000e+00, %35 %306 = fmul float %35, %305 %307 = fadd float %306, 1.000000e+00 %308 = fsub float -0.000000e+00, %36 %309 = fmul float %36, %308 %310 = fadd float %309, 1.000000e+00 %311 = fmul float %276, %304 %312 = fadd float %311, %299 %313 = fmul float %276, %307 %314 = fadd float %313, %300 %315 = fmul float %276, %310 %316 = fadd float %315, %301 %317 = fmul float %273, %312 %318 = fmul float %274, %314 %319 = fmul float %275, %316 %320 = fmul float %317, %180 %321 = fadd float %320, %296 %322 = fmul float %318, %181 %323 = fadd float %322, %297 %324 = fmul float %319, %182 %325 = fadd float %324, %298 %326 = fcmp uge float %321, 0x3E7AD7F2A0000000 %327 = select i1 %326, float %321, float 0x3E7AD7F2A0000000 %328 = call float @llvm.AMDGPU.rsq.clamped.f32(float %327) %329 = fdiv float 1.000000e+00, %328 %330 = fcmp uge float %323, 0x3E7AD7F2A0000000 %331 = select i1 %330, float %323, float 0x3E7AD7F2A0000000 %332 = call float @llvm.AMDGPU.rsq.clamped.f32(float %331) %333 = fcmp uge float %325, 0x3E7AD7F2A0000000 %334 = select i1 %333, float %325, float 0x3E7AD7F2A0000000 %335 = call float @llvm.AMDGPU.rsq.clamped.f32(float %334) %336 = fmul float %335, %334 %337 = fsub float -0.000000e+00, %334 %338 = call float @llvm.AMDGPU.cndlt(float %337, float %336, float 0.000000e+00) %339 = fdiv float 1.000000e+00, %332 %340 = fsub float -0.000000e+00, %329 %341 = fadd float %340, %28 %342 = fsub float -0.000000e+00, %339 %343 = fadd float %342, %29 %344 = fsub float -0.000000e+00, %338 %345 = fadd float %344, %30 %346 = fmul float %114, %341 %347 = fadd float %346, %329 %348 = fmul float %114, %343 %349 = fadd float %348, %339 %350 = fmul float %114, %345 %351 = fadd float %350, %338 %352 = fsub float -0.000000e+00, %24 %353 = fadd float %352, %74 %354 = fcmp oge float %353, 0.000000e+00 %355 = sext i1 %354 to i32 %356 = bitcast i32 %355 to float %357 = bitcast float %356 to i32 %358 = icmp ne i32 %357, 0 %. = select i1 %358, float 1.000000e+00, float 0x3FD51EB860000000 %359 = fsub float -0.000000e+00, %37 %360 = fadd float %359, %38 %361 = fmul float %39, %360 %362 = fadd float %361, %37 %363 = fmul float %362, %. %364 = call i32 @llvm.SI.packf16(float %347, float %349) %365 = bitcast i32 %364 to float %366 = call i32 @llvm.SI.packf16(float %351, float %363) %367 = bitcast i32 %366 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %365, float %367, float %365, float %367) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_MUL_F32_e32 v4, v3, v3 ; 10080703 V_MAD_F32 v4, v2, v2, v4, 0, 0 ; D2820004 04120502 V_INTERP_P1_F32 v5, v0, 2, 0, [m0] ; C8140200 V_INTERP_P2_F32 v5, [v5], v1, 2, 0, [m0] ; C8150201 V_MAD_F32 v4, v5, v5, v4, 0, 0 ; D2820004 04120B05 V_RSQ_CLAMP_F32_e32 v4, v4 ; 7E085904 V_MUL_F32_e32 v6, v2, v4 ; 100C0902 V_INTERP_P1_F32 v2, v0, 0, 1, [m0] ; C8080400 V_INTERP_P2_F32 v2, [v2], v1, 0, 1, [m0] ; C8090401 V_INTERP_P1_F32 v10, v0, 1, 1, [m0] ; C8280500 V_INTERP_P2_F32 v10, [v10], v1, 1, 1, [m0] ; C8290501 V_MUL_F32_e32 v11, v10, v10 ; 1016150A V_MAD_F32 v11, v2, v2, v11, 0, 0 ; D282000B 042E0502 V_INTERP_P1_F32 v12, v0, 2, 1, [m0] ; C8300600 V_INTERP_P2_F32 v12, [v12], v1, 2, 1, [m0] ; C8310601 V_MAD_F32 v11, v12, v12, v11, 0, 0 ; D282000B 042E190C V_RSQ_CLAMP_F32_e32 v11, v11 ; 7E16590B V_MUL_F32_e32 v2, v2, v11 ; 10041702 V_MUL_F32_e32 v13, v2, v6 ; 101A0D02 V_MUL_F32_e32 v7, v3, v4 ; 100E0903 V_MUL_F32_e32 v3, v10, v11 ; 1006170A V_MAD_F32 v10, v3, v7, v13, 0, 0 ; D282000A 04360F03 V_MUL_F32_e32 v8, v5, v4 ; 10100905 V_MUL_F32_e32 v4, v12, v11 ; 1008170C V_MAD_F32 v5, v4, v8, v10, 0, 0 ; D2820005 042A1104 V_ADD_F32_e32 v5, v5, v5 ; 060A0B05 V_MUL_F32_e32 v10, v8, v5 ; 10140B08 V_SUB_F32_e32 v12, v4, v10 ; 08181504 V_MUL_F32_e32 v4, v7, v5 ; 10080B07 V_SUB_F32_e32 v11, v3, v4 ; 08160903 V_MUL_F32_e32 v3, v6, v5 ; 10060B06 V_SUB_F32_e32 v10, v2, v3 ; 08140702 V_MOV_B32_e32 v13, v12 ; 7E1A030C V_CUBESC_F32 v15, v10, v11, v12, 0, 0 ; D28A000F 0432170A V_CUBETC_F32 v14, v10, v11, v12, 0, 0 ; D28C000E 0432170A V_CUBEMA_F32 v16, v10, v11, v12, 0, 0 ; D28E0010 0432170A V_CUBEID_F32 v17, v10, v11, v12, 0, 0 ; D2880011 0432170A V_MOV_B32_e32 v2, 0x7fffffff ; 7E0402FF 7FFFFFFF V_AND_B32_e32 v3, v16, v2 ; 36060510 V_RCP_F32_e32 v3, v3 ; 7E065503 V_MOV_B32_e32 v4, 1.500000e+00 ; 7E0802FF 3FC00000 V_MAD_F32 v16, v14, v3, v4, 0, 0 ; D2820010 0412070E V_MAD_F32 v15, v15, v3, v4, 0, 0 ; D282000F 0412070F S_LOAD_DWORDX4 s[12:15], s[4:5], 0xc ; C086050C S_LOAD_DWORDX8 s[16:23], s[6:7], 0x18 ; C0C80718 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[16:23], s[12:15] ; F0800700 00640E0F S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v3, v15, v15 ; 10061F0F V_MOV_B32_e32 v13, 0.000000e+00 ; 7E1A0280 V_CUBESC_F32 v18, v10, v11, v12, 0, 0 ; D28A0012 0432170A V_CUBETC_F32 v17, v10, v11, v12, 0, 0 ; D28C0011 0432170A V_CUBEMA_F32 v19, v10, v11, v12, 0, 0 ; D28E0013 0432170A V_CUBEID_F32 v20, v10, v11, v12, 0, 0 ; D2880014 0432170A V_AND_B32_e32 v5, v19, v2 ; 360A0513 V_RCP_F32_e32 v5, v5 ; 7E0A5505 V_MAD_F32 v19, v17, v5, v4, 0, 0 ; D2820013 04120B11 V_MAD_F32 v18, v18, v5, v4, 0, 0 ; D2820012 04120B12 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x10 ; C0860510 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x20 ; C0C80720 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[16:23], s[12:15] ; F0800700 00641112 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v5, v18, v18 ; 100A2512 V_SUB_F32_e32 v3, v5, v3 ; 08060705 V_INTERP_P1_F32 v21, v0, 3, 1, [m0] ; C8540700 V_INTERP_P2_F32 v21, [v21], v1, 3, 1, [m0] ; C8550701 V_INTERP_P1_F32 v20, v0, 3, 0, [m0] ; C8500300 V_INTERP_P2_F32 v20, [v20], v1, 3, 0, [m0] ; C8510301 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x4 ; C0860504 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x8 ; C0C80708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800F00 00641614 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v5, v25, v25 ; 100A3319 V_MUL_F32_e32 v5, v5, v5 ; 100A0B05 V_MUL_F32_e32 v5, v5, v5 ; 100A0B05 V_ADD_F32_e64 v5, v5, 0, 1, 0 ; D2060805 00010105 V_MUL_F32_e32 v3, v5, v3 ; 10060705 V_MAD_F32 v3, v15, v15, v3, 0, 0 ; D2820003 040E1F0F V_MOV_B32_e32 v9, v13 ; 7E12030D V_CUBESC_F32 v27, v6, v7, v8, 0, 0 ; D28A001B 04220F06 V_CUBETC_F32 v26, v6, v7, v8, 0, 0 ; D28C001A 04220F06 V_CUBEMA_F32 v28, v6, v7, v8, 0, 0 ; D28E001C 04220F06 V_CUBEID_F32 v29, v6, v7, v8, 0, 0 ; D288001D 04220F06 V_AND_B32_e32 v2, v28, v2 ; 3604051C V_RCP_F32_e32 v2, v2 ; 7E045502 V_MAD_F32 v28, v26, v2, v4, 0, 0 ; D282001C 0412051A V_MAD_F32 v27, v27, v2, v4, 0, 0 ; D282001B 0412051B S_LOAD_DWORDX4 s[12:15], s[4:5], 0x8 ; C0860508 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x10 ; C0C80710 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[16:23], s[12:15] ; F0800700 0064061B S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v2, v7, v7 ; 10040F07 S_LOAD_DWORDX4 s[12:15], s[4:5], 0x0 ; C0860500 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x0 ; C0C80700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800F00 00640914 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v12, v12 ; 1008190C S_LOAD_DWORDX4 s[12:15], s[2:3], 0x0 ; C0860300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[12:15], 0x11 ; C2000D11 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v13, s0 ; 7E1A0200 V_MUL_F32_e32 v20, s0, v13 ; 10281A00 V_SUB_F32_e32 v20, 1.000000e+00, v20 ; 082828F2 V_MUL_F32_e32 v20, v4, v20 ; 10282904 V_MAD_F32 v13, s0, v13, v20, 0, 0 ; D282000D 04521A00 V_MUL_F32_e32 v20, v10, v10 ; 1028150A V_MUL_F32_e32 v13, v20, v13 ; 101A1B14 V_MUL_F32_e32 v2, v13, v2 ; 1004050D V_MUL_F32_e32 v13, v23, v23 ; 101A2F17 V_MAD_F32 v2, v3, v13, v2, 0, 0 ; D2820002 040A1B03 V_MOV_B32_e32 v3, 1.000000e-07 ; 7E0602FF 33D6BF95 V_CMP_GE_F32_e32 vcc, v2, v3 ; 7C0C0702 V_CMP_U_F32_e64 s[0:1], v2, v2, 0, 0 ; D0100000 00020502 V_CNDMASK_B32_e64 v13, 0, -1, vcc, 0, 0, 0, 0 ; D200000D 01A98280 V_CNDMASK_B32_e64 v20, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000014 00018280 V_OR_B32_e32 v13, v13, v20 ; 381A290D V_MOV_B32_e32 v20, 0x33d6bf95 ; 7E2802FF 33D6BF95 V_CMP_NE_I32_e64 s[0:1], v13, 0, 0, 0 ; D10A0000 0001010D V_CNDMASK_B32_e64 v2, v20, v2, s[0:1], 0, 0, 0, 0 ; D2000002 00020514 V_RSQ_CLAMP_F32_e32 v2, v2 ; 7E045902 V_RCP_F32_e32 v2, v2 ; 7E045502 S_BUFFER_LOAD_DWORD s0, s[12:15], 0x9 ; C2000D09 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUB_F32_e32 v13, s0, v2 ; 081A0400 V_INTERP_P1_F32 v21, v0, 1, 3, [m0] ; C8540D00 V_INTERP_P2_F32 v21, [v21], v1, 1, 3, [m0] ; C8550D01 V_INTERP_P1_F32 v26, v0, 2, 3, [m0] ; C8680E00 V_INTERP_P2_F32 v26, [v26], v1, 2, 3, [m0] ; C8690E01 V_RCP_F32_e32 v26, v26 ; 7E34551A V_MUL_F32_e32 v21, v26, v21 ; 102A2B1A V_MAD_F32 v26, v21, 5.000000e-01, 5.000000e-01, 0, 0 ; D282001A 03C1E115 S_BUFFER_LOAD_DWORD s0, s[12:15], 0x7 ; C2000D07 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v27, s0 ; 7E360200 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x14 ; C0800514 S_LOAD_DWORDX8 s[16:23], s[6:7], 0x28 ; C0C80728 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v21, 1, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[16:23], s[0:3] ; F0800100 0004151A V_INTERP_P1_F32 v26, v0, 1, 2, [m0] ; C8680900 V_INTERP_P2_F32 v26, [v26], v1, 1, 2, [m0] ; C8690901 S_BUFFER_LOAD_DWORD s0, s[12:15], 0xd ; C2000D0D S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_SUBREV_F32_e32 v26, s0, v26 ; 0A343400 V_INTERP_P1_F32 v27, v0, 0, 2, [m0] ; C86C0800 V_INTERP_P2_F32 v27, [v27], v1, 0, 2, [m0] ; C86D0801 S_BUFFER_LOAD_DWORD s0, s[12:15], 0xc ; C2000D0C S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v27, s0, v27 ; 0A363600 V_MUL_F32_e32 v27, v27, v27 ; 1036371B V_MAD_F32 v26, v26, v26, v27, 0, 0 ; D282001A 046E351A V_INTERP_P1_F32 v27, v0, 2, 2, [m0] ; C86C0A00 V_INTERP_P2_F32 v27, [v27], v1, 2, 2, [m0] ; C86D0A01 S_BUFFER_LOAD_DWORD s0, s[12:15], 0xe ; C2000D0E S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v0, s0, v27 ; 0A003600 V_MAD_F32 v1, v0, v0, v26, 0, 0 ; D2820001 046A0100 S_BUFFER_LOAD_DWORD s0, s[12:15], 0x5 ; C2000D05 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s0, v1 ; 10020200 S_BUFFER_LOAD_DWORD s0, s[12:15], 0x4 ; C2000D04 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v26, s0, v0 ; 10340000 V_MUL_F32_e32 v26, 1.442700e+00, v26 ; 103434FF 3FB8AA65 V_EXP_F32_e32 v26, v26 ; 7E344B1A V_SUB_F32_e32 v26, 1.000000e+00, v26 ; 083434F2 V_MUL_F32_e32 v1, v26, v1 ; 1002031A V_RCP_F32_e32 v0, v0 ; 7E005500 V_MUL_F32_e32 v0, v0, v1 ; 10000300 V_MUL_F32_e32 v0, 1.442700e+00, v0 ; 100000FF 3FB8AA65 V_EXP_F32_e32 v0, v0 ; 7E004B00 V_ADD_F32_e64 v0, v0, 0, 1, 0 ; D2060800 00010100 V_SUB_F32_e32 v0, 1.000000e+00, v0 ; 080000F2 V_MUL_F32_e32 v0, v0, v21 ; 10002B00 V_MAD_F32 v1, v0, v13, v2, 0, 0 ; D2820001 040A1B00 V_MUL_F32_e32 v2, v14, v14 ; 10041D0E V_MUL_F32_e32 v13, v17, v17 ; 101A2311 V_SUB_F32_e32 v2, v13, v2 ; 0804050D V_MUL_F32_e32 v2, v5, v2 ; 10040505 V_MAD_F32 v2, v14, v14, v2, 0, 0 ; D2820002 040A1D0E V_MUL_F32_e32 v13, v6, v6 ; 101A0D06 S_BUFFER_LOAD_DWORD s0, s[12:15], 0x10 ; C2000D10 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v21, s0 ; 7E2A0200 V_MUL_F32_e32 v26, s0, v21 ; 10342A00 V_SUB_F32_e32 v26, 1.000000e+00, v26 ; 083434F2 V_MUL_F32_e32 v26, v4, v26 ; 10343504 V_MAD_F32 v21, s0, v21, v26, 0, 0 ; D2820015 046A2A00 V_MUL_F32_e32 v26, v9, v9 ; 10341309 V_MUL_F32_e32 v21, v26, v21 ; 102A2B1A V_MUL_F32_e32 v13, v21, v13 ; 101A1B15 V_MUL_F32_e32 v21, v22, v22 ; 102A2D16 V_MAD_F32 v2, v2, v21, v13, 0, 0 ; D2820002 04362B02 V_CMP_GE_F32_e32 vcc, v2, v3 ; 7C0C0702 V_CMP_U_F32_e64 s[0:1], v2, v2, 0, 0 ; D0100000 00020502 V_CNDMASK_B32_e64 v13, 0, -1, vcc, 0, 0, 0, 0 ; D200000D 01A98280 V_CNDMASK_B32_e64 v21, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000015 00018280 V_OR_B32_e32 v13, v13, v21 ; 381A2B0D V_CMP_NE_I32_e64 s[0:1], v13, 0, 0, 0 ; D10A0000 0001010D V_CNDMASK_B32_e64 v2, v20, v2, s[0:1], 0, 0, 0, 0 ; D2000002 00020514 V_RSQ_CLAMP_F32_e32 v2, v2 ; 7E045902 V_RCP_F32_e32 v2, v2 ; 7E045502 S_BUFFER_LOAD_DWORD s0, s[12:15], 0x8 ; C2000D08 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUB_F32_e32 v13, s0, v2 ; 081A0400 V_MAD_F32 v2, v0, v13, v2, 0, 0 ; D2820002 040A1B00 V_CVT_PKRTZ_F16_F32_e32 v1, v2, v1 ; 5E020302 V_MUL_F32_e32 v2, v16, v16 ; 10042110 V_MUL_F32_e32 v13, v19, v19 ; 101A2713 V_SUB_F32_e32 v2, v13, v2 ; 0804050D V_MUL_F32_e32 v2, v5, v2 ; 10040505 V_MAD_F32 v2, v16, v16, v2, 0, 0 ; D2820002 040A2110 V_MUL_F32_e32 v5, v8, v8 ; 100A1108 S_BUFFER_LOAD_DWORD s0, s[12:15], 0x12 ; C2000D12 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s0 ; 7E0C0200 V_MUL_F32_e32 v7, s0, v6 ; 100E0C00 V_SUB_F32_e32 v7, 1.000000e+00, v7 ; 080E0EF2 V_MUL_F32_e32 v4, v4, v7 ; 10080F04 V_MAD_F32 v4, s0, v6, v4, 0, 0 ; D2820004 04120C00 V_MUL_F32_e32 v6, v11, v11 ; 100C170B V_MUL_F32_e32 v4, v6, v4 ; 10080906 V_MUL_F32_e32 v4, v4, v5 ; 10080B04 V_MUL_F32_e32 v5, v24, v24 ; 100A3118 V_MAD_F32 v2, v2, v5, v4, 0, 0 ; D2820002 04120B02 V_CMP_GE_F32_e32 vcc, v2, v3 ; 7C0C0702 V_CMP_U_F32_e64 s[0:1], v2, v2, 0, 0 ; D0100000 00020502 V_CNDMASK_B32_e64 v3, 0, -1, vcc, 0, 0, 0, 0 ; D2000003 01A98280 V_CNDMASK_B32_e64 v4, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000004 00018280 V_OR_B32_e32 v3, v3, v4 ; 38060903 V_CMP_NE_I32_e64 s[0:1], v3, 0, 0, 0 ; D10A0000 00010103 V_CNDMASK_B32_e64 v2, v20, v2, s[0:1], 0, 0, 0, 0 ; D2000002 00020514 V_RSQ_CLAMP_F32_e32 v3, v2 ; 7E065902 V_MUL_F32_e32 v3, v3, v2 ; 10060503 V_MOV_B32_e32 v4, 0x80000000 ; 7E0802FF 80000000 V_XOR_B32_e32 v2, v2, v4 ; 3A040902 V_CMP_GT_F32_e32 vcc, 0, v2 ; 7C080480 V_CNDMASK_B32_e64 v2, 0.000000e+00, v3, vcc, 0, 0, 0, 0 ; D2000002 01AA0680 S_BUFFER_LOAD_DWORD s0, s[12:15], 0xa ; C2000D0A S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUB_F32_e32 v3, s0, v2 ; 08060400 V_MAD_F32 v0, v0, v3, v2, 0, 0 ; D2820000 040A0700 S_BUFFER_LOAD_DWORD s0, s[12:15], 0x0 ; C2000D00 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v2, s0, v27 ; 0A043600 V_CMP_GE_F32_e64 s[0:1], v2, 0.000000e+00, 0, 0 ; D00C0000 00010102 V_MOV_B32_e32 v2, 0x3ea8f5c3 ; 7E0402FF 3EA8F5C3 V_CNDMASK_B32_e64 v2, v2, 1.000000e+00, s[0:1], 0, 0, 0, 0 ; D2000002 0001E502 S_BUFFER_LOAD_DWORD s0, s[12:15], 0x15 ; C2000D15 S_BUFFER_LOAD_DWORD s1, s[12:15], 0x14 ; C2008D14 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s1 ; 7E060201 V_SUB_F32_e32 v4, s0, v3 ; 08080600 S_BUFFER_LOAD_DWORD s0, s[12:15], 0x16 ; C2000D16 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s0, v4, v3, 0, 0 ; D2820003 040E0800 V_MUL_F32_e32 v2, v3, v2 ; 10040503 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v2 ; 5E000500 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} 0: FRC TEMP[0].x, IN[1].zzzz 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].zzzz 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: MUL TEMP[2].xyz, IN[0].yyyy, CONST[ADDR[0].x].xyww 7: MOV TEMP[0].xyz, TEMP[2].xyzx 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: MAD TEMP[2].xyz, IN[0].xxxx, CONST[ADDR[0].x].xyww, TEMP[0] 11: MOV TEMP[0].xyz, TEMP[2].xyzx 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: MAD TEMP[0].xyz, IN[0].zzzz, CONST[ADDR[0].x].xyww, TEMP[0] 16: F2I TEMP[1].x, TEMP[1].xxxx 17: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 18: UARL ADDR[0].x, TEMP[1].xxxx 19: ADD TEMP[0].xyw, TEMP[0].xyzz, CONST[ADDR[0].x] 20: MOV TEMP[0].xyw, TEMP[0].xyxw 21: MOV TEMP[0].z, IMM[0].yyyy 22: MOV OUT[0], TEMP[0] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0 %22 = add i32 %5, %7 %23 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %21, i32 0, i32 %22) %24 = extractelement <4 x float> %23, i32 2 %25 = call float @llvm.AMDIL.fraction.(float %24) %26 = fsub float -0.000000e+00, %25 %27 = fadd float %26, %24 %28 = fmul float %27, 4.000000e+00 %29 = fptosi float %28 to i32 %30 = bitcast i32 %29 to float %31 = bitcast float %30 to i32 %32 = add i32 1, %31 %33 = bitcast i32 %32 to float %34 = bitcast float %33 to i32 %35 = shl i32 %34, 4 %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %35) %37 = fmul float %18, %36 %38 = shl i32 %34, 4 %39 = add i32 %38, 4 %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %39) %41 = fmul float %18, %40 %42 = shl i32 %34, 4 %43 = add i32 %42, 12 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = fmul float %18, %44 %46 = fptosi float %28 to i32 %47 = bitcast i32 %46 to float %48 = bitcast float %47 to i32 %49 = shl i32 %48, 4 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %17, %50 %52 = fadd float %51, %37 %53 = shl i32 %48, 4 %54 = add i32 %53, 4 %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %54) %56 = fmul float %17, %55 %57 = fadd float %56, %41 %58 = shl i32 %48, 4 %59 = add i32 %58, 12 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %17, %60 %62 = fadd float %61, %45 %63 = fptosi float %28 to i32 %64 = bitcast i32 %63 to float %65 = bitcast float %64 to i32 %66 = add i32 2, %65 %67 = bitcast i32 %66 to float %68 = bitcast float %67 to i32 %69 = shl i32 %68, 4 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %19, %70 %72 = fadd float %71, %52 %73 = shl i32 %68, 4 %74 = add i32 %73, 4 %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %74) %76 = fmul float %19, %75 %77 = fadd float %76, %57 %78 = shl i32 %68, 4 %79 = add i32 %78, 12 %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %79) %81 = fmul float %19, %80 %82 = fadd float %81, %62 %83 = fptosi float %28 to i32 %84 = bitcast i32 %83 to float %85 = bitcast float %84 to i32 %86 = add i32 3, %85 %87 = bitcast i32 %86 to float %88 = bitcast float %87 to i32 %89 = shl i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = fadd float %72, %90 %92 = shl i32 %88, 4 %93 = add i32 %92, 4 %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %93) %95 = fadd float %77, %94 %96 = shl i32 %88, 4 %97 = add i32 %96, 12 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = fadd float %82, %98 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %91, float %95, float 0.000000e+00, float %99) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 V_FRACT_F32_e32 v5, v3 ; 7E0A4103 V_SUB_F32_e32 v1, v5, v3 ; 08020705 V_MUL_F32_e32 v1, -4.000000e+00, v1 ; 100202F7 V_CVT_I32_F32_e32 v1, v1 ; 7E021101 V_LSHLREV_B32_e32 v2, 4, v1 ; 34040284 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v3, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000302 V_ADD_I32_e32 v4, 1, v1 ; 4A080281 V_LSHLREV_B32_e32 v4, 4, v4 ; 34080884 BUFFER_LOAD_DWORD v5, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000504 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[6:9], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010600 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v7, v5 ; 10000B07 V_MAD_F32 v0, v6, v3, v0, 0, 0 ; D2820000 04020706 V_ADD_I32_e32 v3, 2, v1 ; 4A060282 V_LSHLREV_B32_e32 v3, 4, v3 ; 34060684 BUFFER_LOAD_DWORD v5, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000503 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v8, v5, v0, 0, 0 ; D2820000 04020B08 V_ADD_I32_e32 v1, 3, v1 ; 4A020283 V_LSHLREV_B32_e32 v1, 4, v1 ; 34020284 BUFFER_LOAD_DWORD v5, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000501 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v0, v5 ; 06000B00 V_OR_B32_e32 v5, 12, v2 ; 380A048C BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 V_OR_B32_e32 v10, 12, v4 ; 3814088C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v10, v7, v10 ; 10141507 V_MAD_F32 v5, v6, v5, v10, 0, 0 ; D2820005 042A0B06 V_OR_B32_e32 v10, 12, v3 ; 3814068C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v5, v8, v10, v5, 0, 0 ; D2820005 04161508 V_OR_B32_e32 v10, 12, v1 ; 3814028C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v5, v5, v10 ; 060A1505 V_OR_B32_e32 v2, 4, v2 ; 38040484 BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 V_OR_B32_e32 v4, 4, v4 ; 38080884 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v7, v4 ; 10080907 V_MAD_F32 v2, v6, v2, v4, 0, 0 ; D2820002 04120506 V_OR_B32_e32 v3, 4, v3 ; 38060684 BUFFER_LOAD_DWORD v3, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000303 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v2, v8, v3, v2, 0, 0 ; D2820002 040A0708 V_OR_B32_e32 v1, 4, v1 ; 38020284 BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v1, v2, v1 ; 06020302 V_MOV_B32_e32 v2, 0.000000e+00 ; 7E040280 EXP 15, 12, 0, 1, 0, v0, v1, v2, v5 ; F80008CF 05020100 S_ENDPGM ; BF810000 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxx 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: V_CVT_PKRTZ_F16_F32_e64 v0, 0.000000e+00, 0.000000e+00, 0, 0 ; D25E0000 00010080 EXP 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..196] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.5000, -0.5000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} IMM[2] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: FRC TEMP[0].x, IN[1].zzzz 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].zzzz 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: UARL ADDR[0].x, TEMP[2].xxxx 7: MUL TEMP[0], IN[0].yyyy, CONST[ADDR[0].x] 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: UARL ADDR[0].x, TEMP[2].xxxx 11: MAD TEMP[0], IN[0].xxxx, CONST[ADDR[0].x], TEMP[0] 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MAD TEMP[0], IN[0].zzzz, CONST[ADDR[0].x], TEMP[0] 17: F2I TEMP[2].x, TEMP[1].xxxx 18: UADD TEMP[2].x, IMM[1].zzzz, TEMP[2].xxxx 19: UARL ADDR[0].x, TEMP[2].xxxx 20: UARL ADDR[0].x, TEMP[2].xxxx 21: ADD TEMP[2], TEMP[0], CONST[ADDR[0].x] 22: F2I TEMP[3].x, TEMP[1].xxxx 23: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 24: UARL ADDR[0].x, TEMP[3].xxxx 25: MUL TEMP[3].xyz, IN[0].yyyy, CONST[ADDR[0].x+96] 26: MOV TEMP[0].xyz, TEMP[3].xyzx 27: F2I TEMP[3].x, TEMP[1].xxxx 28: UARL ADDR[0].x, TEMP[3].xxxx 29: MAD TEMP[3].xyz, IN[0].xxxx, CONST[ADDR[0].x+96], TEMP[0] 30: MOV TEMP[0].xyz, TEMP[3].xyzx 31: F2I TEMP[3].x, TEMP[1].xxxx 32: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 33: UARL ADDR[0].x, TEMP[3].xxxx 34: MAD TEMP[3].xyz, IN[0].zzzz, CONST[ADDR[0].x+96], TEMP[0] 35: MOV TEMP[0].xyz, TEMP[3].xyzx 36: F2I TEMP[1].x, TEMP[1].xxxx 37: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 38: UARL ADDR[0].x, TEMP[1].xxxx 39: ADD TEMP[1].xyz, TEMP[0], CONST[ADDR[0].x+96] 40: MOV TEMP[0].xyz, TEMP[1].xyzx 41: ADD TEMP[3].xy, TEMP[0], CONST[196].zwzw 42: MOV TEMP[3].xy, TEMP[3].xyxx 43: MUL TEMP[3].xy, TEMP[3], CONST[196] 44: MOV TEMP[3].xy, TEMP[3].xyxx 45: MUL TEMP[4].yw, TEMP[1].yyyy, CONST[193].xxzy 46: MOV TEMP[0].yw, TEMP[4].wyww 47: MAD TEMP[4].xy, TEMP[1].xxxx, CONST[192], TEMP[0].ywzw 48: MOV TEMP[0].xy, TEMP[4].xyxx 49: MAD TEMP[1].xy, TEMP[1].zzzz, CONST[194], TEMP[0] 50: MOV TEMP[0].xy, TEMP[1].xyxx 51: ADD TEMP[1].xy, TEMP[0], CONST[195] 52: MOV TEMP[0].xy, TEMP[1].xyxx 53: MAD TEMP[0].xy, TEMP[0], IMM[0].yzzw, IMM[0].zzzz 54: MOV TEMP[0].xy, TEMP[0].xyxx 55: MOV TEMP[3].zw, IMM[2].yyxy 56: MOV TEMP[0].zw, IMM[2].yyxy 57: MOV OUT[1], TEMP[3] 58: MOV OUT[2], TEMP[0] 59: MOV OUT[0], TEMP[2] 60: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3072) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3076) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3088) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3092) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3104) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3108) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3120) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3124) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3136) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3140) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3144) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3148) %25 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0 %27 = add i32 %5, %7 %28 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %27) %29 = extractelement <4 x float> %28, i32 0 %30 = extractelement <4 x float> %28, i32 1 %31 = extractelement <4 x float> %28, i32 2 %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 2 %37 = call float @llvm.AMDIL.fraction.(float %36) %38 = fsub float -0.000000e+00, %37 %39 = fadd float %38, %36 %40 = fmul float %39, 4.000000e+00 %41 = fptosi float %40 to i32 %42 = bitcast i32 %41 to float %43 = bitcast float %42 to i32 %44 = add i32 1, %43 %45 = bitcast i32 %44 to float %46 = bitcast float %45 to i32 %47 = shl i32 %46, 4 %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %47) %49 = fmul float %30, %48 %50 = shl i32 %46, 4 %51 = add i32 %50, 4 %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %51) %53 = fmul float %30, %52 %54 = shl i32 %46, 4 %55 = add i32 %54, 8 %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %55) %57 = fmul float %30, %56 %58 = shl i32 %46, 4 %59 = add i32 %58, 12 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %30, %60 %62 = fptosi float %40 to i32 %63 = bitcast i32 %62 to float %64 = bitcast float %63 to i32 %65 = shl i32 %64, 4 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = fmul float %29, %66 %68 = fadd float %67, %49 %69 = shl i32 %64, 4 %70 = add i32 %69, 4 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = fmul float %29, %71 %73 = fadd float %72, %53 %74 = shl i32 %64, 4 %75 = add i32 %74, 8 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = fmul float %29, %76 %78 = fadd float %77, %57 %79 = shl i32 %64, 4 %80 = add i32 %79, 12 %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %80) %82 = fmul float %29, %81 %83 = fadd float %82, %61 %84 = fptosi float %40 to i32 %85 = bitcast i32 %84 to float %86 = bitcast float %85 to i32 %87 = add i32 2, %86 %88 = bitcast i32 %87 to float %89 = bitcast float %88 to i32 %90 = shl i32 %89, 4 %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %90) %92 = fmul float %31, %91 %93 = fadd float %92, %68 %94 = shl i32 %89, 4 %95 = add i32 %94, 4 %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %95) %97 = fmul float %31, %96 %98 = fadd float %97, %73 %99 = shl i32 %89, 4 %100 = add i32 %99, 8 %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %100) %102 = fmul float %31, %101 %103 = fadd float %102, %78 %104 = shl i32 %89, 4 %105 = add i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %105) %107 = fmul float %31, %106 %108 = fadd float %107, %83 %109 = fptosi float %40 to i32 %110 = bitcast i32 %109 to float %111 = bitcast float %110 to i32 %112 = add i32 3, %111 %113 = bitcast i32 %112 to float %114 = bitcast float %113 to i32 %115 = shl i32 %114, 4 %116 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %115) %117 = fadd float %93, %116 %118 = shl i32 %114, 4 %119 = add i32 %118, 4 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fadd float %98, %120 %122 = shl i32 %114, 4 %123 = add i32 %122, 8 %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %123) %125 = fadd float %103, %124 %126 = shl i32 %114, 4 %127 = add i32 %126, 12 %128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %127) %129 = fadd float %108, %128 %130 = fptosi float %40 to i32 %131 = bitcast i32 %130 to float %132 = bitcast float %131 to i32 %133 = add i32 1, %132 %134 = bitcast i32 %133 to float %135 = bitcast float %134 to i32 %136 = shl i32 %135, 4 %137 = add i32 %136, 1536 %138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %137) %139 = fmul float %30, %138 %140 = shl i32 %135, 4 %141 = add i32 %140, 1540 %142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %141) %143 = fmul float %30, %142 %144 = shl i32 %135, 4 %145 = add i32 %144, 1544 %146 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %145) %147 = fmul float %30, %146 %148 = fptosi float %40 to i32 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = shl i32 %150, 4 %152 = add i32 %151, 1536 %153 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %152) %154 = fmul float %29, %153 %155 = fadd float %154, %139 %156 = shl i32 %150, 4 %157 = add i32 %156, 1540 %158 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %157) %159 = fmul float %29, %158 %160 = fadd float %159, %143 %161 = shl i32 %150, 4 %162 = add i32 %161, 1544 %163 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %162) %164 = fmul float %29, %163 %165 = fadd float %164, %147 %166 = fptosi float %40 to i32 %167 = bitcast i32 %166 to float %168 = bitcast float %167 to i32 %169 = add i32 2, %168 %170 = bitcast i32 %169 to float %171 = bitcast float %170 to i32 %172 = shl i32 %171, 4 %173 = add i32 %172, 1536 %174 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %173) %175 = fmul float %31, %174 %176 = fadd float %175, %155 %177 = shl i32 %171, 4 %178 = add i32 %177, 1540 %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %178) %180 = fmul float %31, %179 %181 = fadd float %180, %160 %182 = shl i32 %171, 4 %183 = add i32 %182, 1544 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = fmul float %31, %184 %186 = fadd float %185, %165 %187 = fptosi float %40 to i32 %188 = bitcast i32 %187 to float %189 = bitcast float %188 to i32 %190 = add i32 3, %189 %191 = bitcast i32 %190 to float %192 = bitcast float %191 to i32 %193 = shl i32 %192, 4 %194 = add i32 %193, 1536 %195 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %194) %196 = fadd float %176, %195 %197 = shl i32 %192, 4 %198 = add i32 %197, 1540 %199 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %198) %200 = fadd float %181, %199 %201 = shl i32 %192, 4 %202 = add i32 %201, 1544 %203 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %202) %204 = fadd float %186, %203 %205 = fadd float %196, %23 %206 = fadd float %200, %24 %207 = fmul float %205, %21 %208 = fmul float %206, %22 %209 = fmul float %200, %15 %210 = fmul float %200, %16 %211 = fmul float %196, %13 %212 = fadd float %211, %209 %213 = fmul float %196, %14 %214 = fadd float %213, %210 %215 = fmul float %204, %17 %216 = fadd float %215, %212 %217 = fmul float %204, %18 %218 = fadd float %217, %214 %219 = fadd float %216, %19 %220 = fadd float %218, %20 %221 = fmul float %219, 5.000000e-01 %222 = fadd float %221, -5.000000e-01 %223 = fmul float %220, -5.000000e-01 %224 = fadd float %223, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %207, float %208, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %222, float %224, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %117, float %121, float %125, float %129) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 V_FRACT_F32_e32 v5, v3 ; 7E0A4103 V_SUB_F32_e32 v1, v5, v3 ; 08020705 V_MUL_F32_e32 v1, -4.000000e+00, v1 ; 100202F7 V_CVT_I32_F32_e32 v1, v1 ; 7E021101 V_LSHLREV_B32_e32 v2, 4, v1 ; 34040284 V_ADD_I32_e32 v3, 0x604, v2 ; 4A0604FF 00000604 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v3, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000303 V_ADD_I32_e32 v4, 1, v1 ; 4A080281 V_LSHLREV_B32_e32 v4, 4, v4 ; 34080884 V_ADD_I32_e32 v5, 0x604, v4 ; 4A0A08FF 00000604 BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[6:9], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010600 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v7, v5 ; 10000B07 V_MAD_F32 v0, v6, v3, v0, 0, 0 ; D2820000 04020706 V_ADD_I32_e32 v3, 2, v1 ; 4A060282 V_LSHLREV_B32_e32 v3, 4, v3 ; 34060684 V_ADD_I32_e32 v5, 0x604, v3 ; 4A0A06FF 00000604 BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v8, v5, v0, 0, 0 ; D2820000 04020B08 V_ADD_I32_e32 v1, 3, v1 ; 4A020283 V_LSHLREV_B32_e32 v1, 4, v1 ; 34020284 V_ADD_I32_e32 v5, 0x604, v1 ; 4A0A02FF 00000604 BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v0, v5 ; 06000B00 S_MOV_B32 s4, 0xc4c ; BE8403FF 00000C4C S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v0 ; 060A0004 S_MOV_B32 s4, 0xc44 ; BE8403FF 00000C44 S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v5 ; 100A0A04 V_ADD_I32_e32 v10, 0x600, v2 ; 4A1404FF 00000600 BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A V_ADD_I32_e32 v11, 0x600, v4 ; 4A1608FF 00000600 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v11, v7, v11 ; 10161707 V_MAD_F32 v10, v6, v10, v11, 0, 0 ; D282000A 042E1506 V_ADD_I32_e32 v11, 0x600, v3 ; 4A1606FF 00000600 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v10, v8, v11, v10, 0, 0 ; D282000A 042A1708 V_ADD_I32_e32 v11, 0x600, v1 ; 4A1602FF 00000600 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v10, v10, v11 ; 0614170A S_MOV_B32 s4, 0xc48 ; BE8403FF 00000C48 S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v11, s4, v10 ; 06161404 S_MOV_B32 s4, 0xc40 ; BE8403FF 00000C40 S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v11, s4, v11 ; 10161604 V_MOV_B32_e32 v12, 1.000000e+00 ; 7E1802F2 V_MOV_B32_e32 v13, 0.000000e+00 ; 7E1A0280 EXP 15, 32, 0, 0, 0, v11, v5, v13, v12 ; F800020F 0C0D050B S_MOV_B32 s4, 0xc14 ; BE8403FF 00000C14 S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_MOV_B32 s4, 0xc04 ; BE8403FF 00000C04 S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v10, s4, v5, 0, 0 ; D2820005 0414090A V_ADD_I32_e32 v11, 0x608, v2 ; 4A1604FF 00000608 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B V_ADD_I32_e32 v14, 0x608, v4 ; 4A1C08FF 00000608 BUFFER_LOAD_DWORD v14, s[0:3] + v14 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000E0E S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v14, v7, v14 ; 101C1D07 V_MAD_F32 v11, v6, v11, v14, 0, 0 ; D282000B 043A1706 V_ADD_I32_e32 v14, 0x608, v3 ; 4A1C06FF 00000608 BUFFER_LOAD_DWORD v14, s[0:3] + v14 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000E0E S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v11, v8, v14, v11, 0, 0 ; D282000B 042E1D08 V_ADD_I32_e32 v14, 0x608, v1 ; 4A1C02FF 00000608 BUFFER_LOAD_DWORD v14, s[0:3] + v14 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000E0E S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v11, v11, v14 ; 06161D0B S_MOV_B32 s4, 0xc24 ; BE8403FF 00000C24 S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v11, s4, v5, 0, 0 ; D2820005 0414090B S_MOV_B32 s4, 0xc34 ; BE8403FF 00000C34 S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MAD_F32 v5, v5, -5.000000e-01, -5.000000e-01, 0, 0 ; D2820005 03C5E305 S_MOV_B32 s4, 0xc10 ; BE8403FF 00000C10 S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_MOV_B32 s4, 0xc00 ; BE8403FF 00000C00 S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v10, s4, v0, 0, 0 ; D2820000 0400090A S_MOV_B32 s4, 0xc20 ; BE8403FF 00000C20 S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v11, s4, v0, 0, 0 ; D2820000 0400090B S_MOV_B32 s4, 0xc30 ; BE8403FF 00000C30 S_BUFFER_LOAD_DWORD s4, s[0:3], s4 ; C2020004 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s4, v0 ; 06000004 V_MAD_F32 v0, v0, 5.000000e-01, -5.000000e-01, 0, 0 ; D2820000 03C5E100 EXP 15, 33, 0, 0, 0, v0, v5, v13, v12 ; F800021F 0C0D0500 S_WAITCNT expcnt(0) ; BF8C070F BUFFER_LOAD_DWORD v0, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000002 BUFFER_LOAD_DWORD v5, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000504 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v5, v7, v5 ; 100A0B07 V_MAD_F32 v0, v6, v0, v5, 0, 0 ; D2820000 04160106 BUFFER_LOAD_DWORD v5, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000503 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v8, v5, v0, 0, 0 ; D2820000 04020B08 BUFFER_LOAD_DWORD v5, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000501 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v0, v5 ; 06000B00 V_OR_B32_e32 v5, 12, v2 ; 380A048C BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 V_OR_B32_e32 v10, 12, v4 ; 3814088C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v10, v7, v10 ; 10141507 V_MAD_F32 v5, v6, v5, v10, 0, 0 ; D2820005 042A0B06 V_OR_B32_e32 v10, 12, v3 ; 3814068C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v5, v8, v10, v5, 0, 0 ; D2820005 04161508 V_OR_B32_e32 v10, 12, v1 ; 3814028C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v5, v5, v10 ; 060A1505 V_OR_B32_e32 v10, 8, v2 ; 38140488 BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A V_OR_B32_e32 v11, 8, v4 ; 38160888 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v11, v7, v11 ; 10161707 V_MAD_F32 v10, v6, v10, v11, 0, 0 ; D282000A 042E1506 V_OR_B32_e32 v11, 8, v3 ; 38160688 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v10, v8, v11, v10, 0, 0 ; D282000A 042A1708 V_OR_B32_e32 v11, 8, v1 ; 38160288 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v10, v10, v11 ; 0614170A V_OR_B32_e32 v2, 4, v2 ; 38040484 BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 V_OR_B32_e32 v4, 4, v4 ; 38080884 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v7, v4 ; 10080907 V_MAD_F32 v2, v6, v2, v4, 0, 0 ; D2820002 04120506 V_OR_B32_e32 v3, 4, v3 ; 38060684 BUFFER_LOAD_DWORD v3, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000303 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v2, v8, v3, v2, 0, 0 ; D2820002 040A0708 V_OR_B32_e32 v1, 4, v1 ; 38020284 BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v1, v2, v1 ; 06020302 EXP 15, 12, 0, 1, 0, v0, v1, v10, v5 ; F80008CF 050A0100 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, -0.0100} 0: MAD TEMP[0].xy, IN[1], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0].y, TEMP[0], SAMP[1], 2D 3: ADD TEMP[1].x, TEMP[0].yyyy, IMM[0].wwww 4: FSGE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz 5: UIF TEMP[2].xxxx :0 6: MOV TEMP[2].x, IMM[0].zzzz 7: ELSE :0 8: MOV TEMP[2].x, IMM[0].yyyy 9: ENDIF 10: MOV TEMP[2].x, TEMP[2].xxxx 11: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz 12: UIF TEMP[3].xxxx :0 13: MOV TEMP[3].x, IMM[0].zzzz 14: ELSE :0 15: MOV TEMP[3].x, IMM[0].yyyy 16: ENDIF 17: MOV TEMP[2].y, TEMP[3].xxxx 18: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz 19: UIF TEMP[3].xxxx :0 20: MOV TEMP[3].x, IMM[0].zzzz 21: ELSE :0 22: MOV TEMP[3].x, IMM[0].yyyy 23: ENDIF 24: MOV TEMP[2].z, TEMP[3].xxxx 25: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz 26: UIF TEMP[1].xxxx :0 27: ELSE :0 28: ENDIF 29: FSLT TEMP[1].xyz, TEMP[2].xyzz, IMM[0].zzzz 30: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 31: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 32: UIF TEMP[2].xxxx :0 33: KILL 34: ENDIF 35: MOV TEMP[1].xy, IN[0].xyyy 36: TEX TEMP[1], TEMP[1], SAMP[0], 2D 37: MUL TEMP[0].w, TEMP[0].yyyy, TEMP[1].wwww 38: MOV TEMP[0].w, TEMP[0].wwww 39: MOV TEMP[0].xyz, TEMP[1].xyzx 40: MOV OUT[0], TEMP[0] 41: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %34 = fmul float %32, 1.000000e+00 %35 = fadd float %34, 0.000000e+00 %36 = fmul float %33, -1.000000e+00 %37 = fadd float %36, 1.000000e+00 %38 = bitcast float %35 to i32 %39 = bitcast float %37 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = bitcast <8 x i32> %27 to <32 x i8> %43 = bitcast <4 x i32> %29 to <16 x i8> %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %41, <32 x i8> %42, <16 x i8> %43, i32 2) %45 = extractelement <4 x float> %44, i32 1 %46 = fadd float %45, 0xBF847AE140000000 %47 = fcmp oge float %46, 0.000000e+00 %48 = sext i1 %47 to i32 %49 = bitcast i32 %48 to float %50 = bitcast float %49 to i32 %51 = icmp ne i32 %50, 0 %. = select i1 %51, float 0.000000e+00, float -1.000000e+00 %52 = fcmp oge float %46, 0.000000e+00 %53 = sext i1 %52 to i32 %54 = bitcast i32 %53 to float %55 = bitcast float %54 to i32 %56 = icmp ne i32 %55, 0 %temp12.0 = select i1 %56, float 0.000000e+00, float -1.000000e+00 %57 = fcmp oge float %46, 0.000000e+00 %58 = sext i1 %57 to i32 %59 = bitcast i32 %58 to float %60 = bitcast float %59 to i32 %61 = icmp ne i32 %60, 0 %.28 = select i1 %61, float 0.000000e+00, float -1.000000e+00 %62 = fcmp oge float %46, 0.000000e+00 %63 = sext i1 %62 to i32 %64 = bitcast i32 %63 to float %65 = bitcast float %64 to i32 %66 = icmp ne i32 %65, 0 %67 = fcmp olt float %., 0.000000e+00 %68 = sext i1 %67 to i32 %69 = fcmp olt float %temp12.0, 0.000000e+00 %70 = sext i1 %69 to i32 %71 = fcmp olt float %.28, 0.000000e+00 %72 = sext i1 %71 to i32 %73 = bitcast i32 %68 to float %74 = bitcast i32 %70 to float %75 = bitcast i32 %72 to float %76 = bitcast float %73 to i32 %77 = bitcast float %75 to i32 %78 = or i32 %76, %77 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = bitcast float %74 to i32 %82 = or i32 %80, %81 %83 = bitcast i32 %82 to float %84 = bitcast float %83 to i32 %85 = icmp ne i32 %84, 0 br i1 %85, label %IF26, label %ENDIF25 IF26: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF25 ENDIF25: ; preds = %main_body, %IF26 %86 = bitcast float %30 to i32 %87 = bitcast float %31 to i32 %88 = insertelement <2 x i32> undef, i32 %86, i32 0 %89 = insertelement <2 x i32> %88, i32 %87, i32 1 %90 = bitcast <8 x i32> %23 to <32 x i8> %91 = bitcast <4 x i32> %25 to <16 x i8> %92 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %89, <32 x i8> %90, <16 x i8> %91, i32 2) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = extractelement <4 x float> %92, i32 3 %97 = fmul float %45, %96 %98 = call i32 @llvm.SI.packf16(float %93, float %94) %99 = bitcast i32 %98 to float %100 = call i32 @llvm.SI.packf16(float %95, float %97) %101 = bitcast i32 %100 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %99, float %101, float %99, float %101) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 1, [m0] ; C8080400 V_INTERP_P2_F32 v2, [v2], v1, 0, 1, [m0] ; C8090401 V_INTERP_P1_F32 v4, v0, 1, 1, [m0] ; C8100500 V_INTERP_P2_F32 v4, [v4], v1, 1, 1, [m0] ; C8110501 V_SUB_F32_e32 v3, 1.000000e+00, v4 ; 080608F2 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x4 ; C0800504 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x8 ; C0C60708 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v2, 2, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800200 00030202 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v3, -1.000000e-02, v2 ; 060604FF BC23D70A V_CMP_GE_F32_e64 s[0:1], v3, 0.000000e+00, 0, 0 ; D00C0000 00010103 V_CNDMASK_B32_e64 v3, -1.000000e+00, 0, s[0:1], 0, 0, 0, 0 ; D2000003 000100F3 V_CMP_LT_F32_e64 s[0:1], v3, 0.000000e+00, 0, 0 ; D0020000 00010103 V_CNDMASK_B32_e64 v3, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000003 00018280 V_OR_B32_e32 v3, v3, v3 ; 38060703 V_CMP_NE_I32_e64 s[0:1], v3, 0, 0, 0 ; D10A0000 00010103 V_INTERP_P1_F32 v4, v0, 1, 0, [m0] ; C8100100 V_INTERP_P2_F32 v4, [v4], v1, 1, 0, [m0] ; C8110101 V_INTERP_P1_F32 v3, v0, 0, 0, [m0] ; C80C0000 V_INTERP_P2_F32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[0:1] ; 88FE007E S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[4:11], s[6:7], 0x0 ; C0C20700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800F00 00010303 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_PKRTZ_F16_F32_e32 v0, v3, v4 ; 5E000903 V_MUL_F32_e32 v1, v2, v6 ; 10020D02 V_CVT_PKRTZ_F16_F32_e32 v1, v5, v1 ; 5E020305 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 0.1592, 0.5000, 6.2832, -3.1416} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, IN[4].zzzz, IN[4].yyyy 1: MAD TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx, IMM[0].yyyy 2: FRC TEMP[1].x, TEMP[1].xxxx 3: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 4: COS TEMP[2].x, TEMP[1].xxxx 5: SIN TEMP[1].x, TEMP[1].xxxx 6: MUL TEMP[0], IN[6].xyzy, IN[6].xxxy 7: LRP TEMP[3].xy, TEMP[0].xwzw, IMM[1].xxxx, TEMP[2].xxxx 8: MOV TEMP[4].x, TEMP[3].xyxx 9: ADD TEMP[5].x, -TEMP[2].xxxx, IMM[1].xxxx 10: MUL TEMP[6].yzw, TEMP[1].xxxx, IN[6].xyzx 11: MAD TEMP[7].xy, TEMP[0].zyzw, TEMP[5].xxxx, -TEMP[6].yzzw 12: MOV TEMP[8].x, TEMP[7].xyxx 13: MAD TEMP[9].xy, TEMP[0].yzzw, TEMP[5].xxxx, TEMP[6].zyzw 14: MOV TEMP[10].x, TEMP[9].xyxx 15: MOV TEMP[4].z, TEMP[7].yyyy 16: MOV TEMP[4].w, TEMP[9].yyyy 17: MOV TEMP[10].w, TEMP[3].yyyy 18: DP3 TEMP[3].x, IN[5].xyzz, TEMP[4].xzww 19: MUL TEMP[3].x, TEMP[3].xxxx, IN[4].xxxx 20: MOV TEMP[4].x, TEMP[3].xxxx 21: MUL TEMP[3].yz, IN[6].zzzz, IN[6] 22: LRP TEMP[2].w, TEMP[3].zzzz, IMM[1].xxxx, TEMP[2].xxxx 23: MOV TEMP[8].w, TEMP[2].wwww 24: MAD TEMP[2].z, TEMP[3].yyyy, TEMP[5].xxxx, TEMP[6].wwww 25: MOV TEMP[8].z, TEMP[2].zzzz 26: MAD TEMP[2].z, TEMP[3].yyyy, TEMP[5].xxxx, -TEMP[6].wwww 27: MOV TEMP[10].z, TEMP[2].zzzz 28: DP3 TEMP[2].x, IN[5].xzyy, TEMP[10].xzww 29: MUL TEMP[2].y, TEMP[2].xxxx, IN[4].xxxx 30: MOV TEMP[4].y, TEMP[2].yyyy 31: DP3 TEMP[2].x, IN[5].xyzz, TEMP[8].xzww 32: MUL TEMP[2].z, TEMP[2].xxxx, IN[4].xxxx 33: MOV TEMP[4].z, TEMP[2].zzzz 34: ADD TEMP[2].xyz, TEMP[4], IN[0] 35: MUL TEMP[4], CONST[1].yyyy, CONST[5] 36: MAD TEMP[4], CONST[1].xxxx, CONST[4], TEMP[4] 37: MAD TEMP[4], CONST[1].zzzz, CONST[6], TEMP[4] 38: MAD TEMP[1], CONST[1].wwww, CONST[7], TEMP[4] 39: MUL TEMP[1], TEMP[2].yyyy, TEMP[1] 40: MUL TEMP[8], CONST[0].yyyy, CONST[5] 41: MAD TEMP[8], CONST[0].xxxx, CONST[4], TEMP[8] 42: MAD TEMP[8], CONST[0].zzzz, CONST[6], TEMP[8] 43: MAD TEMP[4], CONST[0].wwww, CONST[7], TEMP[8] 44: MAD TEMP[1], TEMP[2].xxxx, TEMP[4], TEMP[1] 45: MUL TEMP[8], CONST[2].yyyy, CONST[5] 46: MAD TEMP[8], CONST[2].xxxx, CONST[4], TEMP[8] 47: MAD TEMP[8], CONST[2].zzzz, CONST[6], TEMP[8] 48: MAD TEMP[4], CONST[2].wwww, CONST[7], TEMP[8] 49: MAD TEMP[0], TEMP[2].zzzz, TEMP[4], TEMP[1] 50: MUL TEMP[4], CONST[3].yyyy, CONST[5] 51: MAD TEMP[4], CONST[3].xxxx, CONST[4], TEMP[4] 52: MAD TEMP[4], CONST[3].zzzz, CONST[6], TEMP[4] 53: MAD TEMP[1], CONST[3].wwww, CONST[7], TEMP[4] 54: ADD TEMP[0], TEMP[0], TEMP[1] 55: MUL TEMP[1], IN[1], IN[2] 56: MOV TEMP[2].xy, IN[3].xyxx 57: MOV TEMP[2].zw, IMM[1].xxyx 58: MOV OUT[1], TEMP[1] 59: MOV OUT[2], TEMP[2] 60: MOV OUT[0], TEMP[0] 61: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0 %62 = add i32 %5, %7 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = extractelement <4 x float> %63, i32 2 %67 = extractelement <4 x float> %63, i32 3 %68 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %69 = load <16 x i8> addrspace(2)* %68, !tbaa !0 %70 = add i32 %5, %7 %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %70) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0 %76 = add i32 %5, %7 %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %76) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 5 %82 = load <16 x i8> addrspace(2)* %81, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 6 %89 = load <16 x i8> addrspace(2)* %88, !tbaa !0 %90 = add i32 %5, %7 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = fadd float %80, %79 %96 = fmul float %95, 0x3FC45F30E0000000 %97 = fadd float %96, 5.000000e-01 %98 = call float @llvm.AMDIL.fraction.(float %97) %99 = fmul float %98, 0x401921FC80000000 %100 = fadd float %99, 0xC00921FA00000000 %101 = call float @llvm.cos.f32(float %100) %102 = call float @llvm.sin.f32(float %100) %103 = fmul float %92, %92 %104 = fmul float %93, %92 %105 = fmul float %94, %92 %106 = fmul float %93, %93 %107 = call float @llvm.AMDGPU.lrp(float %103, float 1.000000e+00, float %101) %108 = call float @llvm.AMDGPU.lrp(float %106, float 1.000000e+00, float %101) %109 = fsub float -0.000000e+00, %101 %110 = fadd float %109, 1.000000e+00 %111 = fmul float %102, %93 %112 = fmul float %102, %94 %113 = fmul float %102, %92 %114 = fsub float -0.000000e+00, %111 %115 = fmul float %105, %110 %116 = fadd float %115, %114 %117 = fsub float -0.000000e+00, %112 %118 = fmul float %104, %110 %119 = fadd float %118, %117 %120 = fmul float %104, %110 %121 = fadd float %120, %112 %122 = fmul float %105, %110 %123 = fadd float %122, %111 %124 = fmul float %85, %107 %125 = fmul float %86, %119 %126 = fadd float %125, %124 %127 = fmul float %87, %123 %128 = fadd float %126, %127 %129 = fmul float %128, %78 %130 = fmul float %94, %93 %131 = fmul float %94, %94 %132 = call float @llvm.AMDGPU.lrp(float %131, float 1.000000e+00, float %101) %133 = fmul float %130, %110 %134 = fadd float %133, %113 %135 = fsub float -0.000000e+00, %113 %136 = fmul float %130, %110 %137 = fadd float %136, %135 %138 = fmul float %85, %121 %139 = fmul float %87, %137 %140 = fadd float %139, %138 %141 = fmul float %86, %108 %142 = fadd float %140, %141 %143 = fmul float %142, %78 %144 = fmul float %85, %116 %145 = fmul float %86, %134 %146 = fadd float %145, %144 %147 = fmul float %87, %132 %148 = fadd float %146, %147 %149 = fmul float %148, %78 %150 = fadd float %129, %49 %151 = fadd float %143, %50 %152 = fadd float %149, %51 %153 = fmul float %18, %33 %154 = fmul float %18, %34 %155 = fmul float %18, %35 %156 = fmul float %18, %36 %157 = fmul float %17, %29 %158 = fadd float %157, %153 %159 = fmul float %17, %30 %160 = fadd float %159, %154 %161 = fmul float %17, %31 %162 = fadd float %161, %155 %163 = fmul float %17, %32 %164 = fadd float %163, %156 %165 = fmul float %19, %37 %166 = fadd float %165, %158 %167 = fmul float %19, %38 %168 = fadd float %167, %160 %169 = fmul float %19, %39 %170 = fadd float %169, %162 %171 = fmul float %19, %40 %172 = fadd float %171, %164 %173 = fmul float %20, %41 %174 = fadd float %173, %166 %175 = fmul float %20, %42 %176 = fadd float %175, %168 %177 = fmul float %20, %43 %178 = fadd float %177, %170 %179 = fmul float %20, %44 %180 = fadd float %179, %172 %181 = fmul float %151, %174 %182 = fmul float %151, %176 %183 = fmul float %151, %178 %184 = fmul float %151, %180 %185 = fmul float %14, %33 %186 = fmul float %14, %34 %187 = fmul float %14, %35 %188 = fmul float %14, %36 %189 = fmul float %13, %29 %190 = fadd float %189, %185 %191 = fmul float %13, %30 %192 = fadd float %191, %186 %193 = fmul float %13, %31 %194 = fadd float %193, %187 %195 = fmul float %13, %32 %196 = fadd float %195, %188 %197 = fmul float %15, %37 %198 = fadd float %197, %190 %199 = fmul float %15, %38 %200 = fadd float %199, %192 %201 = fmul float %15, %39 %202 = fadd float %201, %194 %203 = fmul float %15, %40 %204 = fadd float %203, %196 %205 = fmul float %16, %41 %206 = fadd float %205, %198 %207 = fmul float %16, %42 %208 = fadd float %207, %200 %209 = fmul float %16, %43 %210 = fadd float %209, %202 %211 = fmul float %16, %44 %212 = fadd float %211, %204 %213 = fmul float %150, %206 %214 = fadd float %213, %181 %215 = fmul float %150, %208 %216 = fadd float %215, %182 %217 = fmul float %150, %210 %218 = fadd float %217, %183 %219 = fmul float %150, %212 %220 = fadd float %219, %184 %221 = fmul float %22, %33 %222 = fmul float %22, %34 %223 = fmul float %22, %35 %224 = fmul float %22, %36 %225 = fmul float %21, %29 %226 = fadd float %225, %221 %227 = fmul float %21, %30 %228 = fadd float %227, %222 %229 = fmul float %21, %31 %230 = fadd float %229, %223 %231 = fmul float %21, %32 %232 = fadd float %231, %224 %233 = fmul float %23, %37 %234 = fadd float %233, %226 %235 = fmul float %23, %38 %236 = fadd float %235, %228 %237 = fmul float %23, %39 %238 = fadd float %237, %230 %239 = fmul float %23, %40 %240 = fadd float %239, %232 %241 = fmul float %24, %41 %242 = fadd float %241, %234 %243 = fmul float %24, %42 %244 = fadd float %243, %236 %245 = fmul float %24, %43 %246 = fadd float %245, %238 %247 = fmul float %24, %44 %248 = fadd float %247, %240 %249 = fmul float %152, %242 %250 = fadd float %249, %214 %251 = fmul float %152, %244 %252 = fadd float %251, %216 %253 = fmul float %152, %246 %254 = fadd float %253, %218 %255 = fmul float %152, %248 %256 = fadd float %255, %220 %257 = fmul float %26, %33 %258 = fmul float %26, %34 %259 = fmul float %26, %35 %260 = fmul float %26, %36 %261 = fmul float %25, %29 %262 = fadd float %261, %257 %263 = fmul float %25, %30 %264 = fadd float %263, %258 %265 = fmul float %25, %31 %266 = fadd float %265, %259 %267 = fmul float %25, %32 %268 = fadd float %267, %260 %269 = fmul float %27, %37 %270 = fadd float %269, %262 %271 = fmul float %27, %38 %272 = fadd float %271, %264 %273 = fmul float %27, %39 %274 = fadd float %273, %266 %275 = fmul float %27, %40 %276 = fadd float %275, %268 %277 = fmul float %28, %41 %278 = fadd float %277, %270 %279 = fmul float %28, %42 %280 = fadd float %279, %272 %281 = fmul float %28, %43 %282 = fadd float %281, %274 %283 = fmul float %28, %44 %284 = fadd float %283, %276 %285 = fadd float %250, %278 %286 = fadd float %252, %280 %287 = fadd float %254, %282 %288 = fadd float %256, %284 %289 = fmul float %56, %64 %290 = fmul float %57, %65 %291 = fmul float %58, %66 %292 = fmul float %59, %67 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %289, float %290, float %291, float %292) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %72, float %73, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %285, float %286, float %287, float %288) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone readonly declare float @llvm.cos.f32(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.sin.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { nounwind readnone readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[5:8], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010500 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v9, v8, v4 ; 10120908 V_MUL_F32_e32 v10, v7, v3 ; 10140707 V_MUL_F32_e32 v11, v6, v2 ; 10160506 V_MUL_F32_e32 v1, v5, v1 ; 10020305 EXP 15, 32, 0, 0, 0, v1, v11, v10, v9 ; F800020F 090A0B01 S_LOAD_DWORDX4 s[4:7], s[8:9], 0xc ; C082090C S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 1.000000e+00 ; 7E0A02F2 V_MOV_B32_e32 v6, 0.000000e+00 ; 7E0C0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x18 ; C0820918 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v5, v3, v1 ; 100A0303 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x10 ; C0820910 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[6:9], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010600 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v10, v8, v7 ; 06140F08 V_MOV_B32_e32 v11, 1.591550e-01 ; 7E1602FF 3E22F987 V_MAD_F32 v10, v10, v11, 5.000000e-01, 0, 0 ; D282000A 03C2170A V_FRACT_F32_e32 v10, v10 ; 7E14410A V_MOV_B32_e32 v11, -3.141590e+00 ; 7E1602FF C0490FD0 V_MOV_B32_e32 v12, 6.283190e+00 ; 7E1802FF 40C90FE4 V_MAD_F32 v10, v10, v12, v11, 0, 0 ; D282000A 042E190A V_MUL_F32_e32 v10, 1.591549e-01, v10 ; 101414FF 3E22F983 V_FRACT_F32_e32 v10, v10 ; 7E14410A V_SIN_F32_e32 v11, v10 ; 7E166B0A V_MUL_F32_e32 v12, v11, v2 ; 1018050B V_COS_F32_e32 v10, v10 ; 7E146D0A V_SUB_F32_e32 v13, 1.000000e+00, v10 ; 081A14F2 V_MAD_F32 v14, v5, v13, v12, 0, 0 ; D282000E 04321B05 V_MUL_F32_e32 v15, v2, v1 ; 101E0302 V_MUL_F32_e32 v16, v15, v13 ; 10201B0F V_MUL_F32_e32 v17, v11, v3 ; 1022070B V_SUB_F32_e32 v16, v17, v16 ; 08202111 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x14 ; C0820914 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[18:21], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80011200 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v16, v19, v16 ; 10202113 V_MUL_F32_e32 v22, v1, v1 ; 102C0301 V_SUB_F32_e32 v22, 1.000000e+00, v22 ; 082C2CF2 V_MUL_F32_e32 v22, v22, v10 ; 102C1516 V_MAD_F32 v22, v1, v1, v22, 0, 0 ; D2820016 045A0301 V_MUL_F32_e32 v22, v18, v22 ; 102C2D12 V_SUB_F32_e32 v16, v22, v16 ; 08202116 V_MAD_F32 v14, v20, v14, v16, 0, 0 ; D282000E 04421D14 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[22:25], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80011600 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v14, v6, v22, 0, 0 ; D2820000 045A0D0E V_MAD_F32 v14, v15, v13, v17, 0, 0 ; D282000E 04461B0F V_MUL_F32_e32 v14, v18, v14 ; 101C1D12 V_MUL_F32_e32 v15, v3, v2 ; 101E0503 V_MUL_F32_e32 v16, v15, v13 ; 10201B0F V_MUL_F32_e32 v11, v11, v1 ; 1016030B V_SUB_F32_e32 v16, v11, v16 ; 0820210B V_MUL_F32_e32 v16, v20, v16 ; 10202114 V_SUB_F32_e32 v14, v14, v16 ; 081C210E V_MUL_F32_e32 v16, v2, v2 ; 10200502 V_SUB_F32_e32 v16, 1.000000e+00, v16 ; 082020F2 V_MUL_F32_e32 v16, v16, v10 ; 10201510 V_MAD_F32 v16, v2, v2, v16, 0, 0 ; D2820010 04420502 V_MAD_F32 v14, v19, v16, v14, 0, 0 ; D282000E 043A2113 V_MAD_F32 v14, v14, v6, v23, 0, 0 ; D282000E 045E0D0E S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x17 ; C2028117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v16, s5 ; 7E200205 V_MUL_F32_e32 v17, s4, v16 ; 10222004 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x4 ; C2028104 S_BUFFER_LOAD_DWORD s6, s[0:3], 0x13 ; C2030113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v26, s6 ; 7E340206 V_MAD_F32 v17, s5, v26, v17, 0, 0 ; D2820011 04463405 S_BUFFER_LOAD_DWORD s6, s[0:3], 0x6 ; C2030106 S_BUFFER_LOAD_DWORD s7, s[0:3], 0x1b ; C203811B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v27, s7 ; 7E360207 V_MAD_F32 v17, s6, v27, v17, 0, 0 ; D2820011 04463606 S_BUFFER_LOAD_DWORD s7, s[0:3], 0x7 ; C2038107 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x1f ; C204011F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v28, s8 ; 7E380208 V_MAD_F32 v17, s7, v28, v17, 0, 0 ; D2820011 04463807 V_MUL_F32_e32 v17, v14, v17 ; 1022230E S_BUFFER_LOAD_DWORD s8, s[0:3], 0x1 ; C2040101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v29, s8, v16 ; 103A2008 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x0 ; C2048100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v29, s9, v26, v29, 0, 0 ; D282001D 04763409 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x2 ; C2050102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v29, s10, v27, v29, 0, 0 ; D282001D 0476360A S_BUFFER_LOAD_DWORD s11, s[0:3], 0x3 ; C2058103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v29, s11, v28, v29, 0, 0 ; D282001D 0476380B V_MAD_F32 v17, v0, v29, v17, 0, 0 ; D2820011 04463B00 V_MAD_F32 v11, v15, v13, v11, 0, 0 ; D282000B 042E1B0F V_MUL_F32_e32 v11, v19, v11 ; 10161713 V_MUL_F32_e32 v5, v5, v13 ; 100A1B05 V_SUB_F32_e32 v5, v12, v5 ; 080A0B0C V_MUL_F32_e32 v5, v18, v5 ; 100A0B12 V_SUB_F32_e32 v5, v11, v5 ; 080A0B0B V_MUL_F32_e32 v11, v3, v3 ; 10160703 V_SUB_F32_e32 v11, 1.000000e+00, v11 ; 081616F2 V_MUL_F32_e32 v10, v11, v10 ; 1014150B V_MAD_F32 v1, v3, v3, v10, 0, 0 ; D2820001 042A0703 V_MAD_F32 v1, v20, v1, v5, 0, 0 ; D2820001 04160314 V_MAD_F32 v1, v1, v6, v24, 0, 0 ; D2820001 04620D01 S_BUFFER_LOAD_DWORD s12, s[0:3], 0x9 ; C2060109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s12, v16 ; 1004200C S_BUFFER_LOAD_DWORD s13, s[0:3], 0x8 ; C2068108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, s13, v26, v2, 0, 0 ; D2820002 040A340D S_BUFFER_LOAD_DWORD s14, s[0:3], 0xa ; C207010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, s14, v27, v2, 0, 0 ; D2820002 040A360E S_BUFFER_LOAD_DWORD s15, s[0:3], 0xb ; C207810B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, s15, v28, v2, 0, 0 ; D2820002 040A380F V_MAD_F32 v2, v1, v2, v17, 0, 0 ; D2820002 04460501 S_BUFFER_LOAD_DWORD s16, s[0:3], 0xd ; C208010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s16, v16 ; 10062010 S_BUFFER_LOAD_DWORD s17, s[0:3], 0xc ; C208810C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s17, v26, v3, 0, 0 ; D2820003 040E3411 S_BUFFER_LOAD_DWORD s18, s[0:3], 0xe ; C209010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s18, v27, v3, 0, 0 ; D2820003 040E3612 S_BUFFER_LOAD_DWORD s19, s[0:3], 0xf ; C209810F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s19, v28, v3, 0, 0 ; D2820003 040E3813 V_ADD_F32_e32 v2, v2, v3 ; 06040702 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x16 ; C20A0116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s20 ; 7E060214 V_MUL_F32_e32 v4, s4, v3 ; 10080604 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x12 ; C20A0112 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s20 ; 7E0A0214 V_MAD_F32 v4, s5, v5, v4, 0, 0 ; D2820004 04120A05 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x1a ; C20A011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s20 ; 7E0C0214 V_MAD_F32 v4, s6, v6, v4, 0, 0 ; D2820004 04120C06 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x1e ; C20A011E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s20 ; 7E0E0214 V_MAD_F32 v4, s7, v7, v4, 0, 0 ; D2820004 04120E07 V_MUL_F32_e32 v4, v14, v4 ; 1008090E V_MUL_F32_e32 v8, s8, v3 ; 10100608 V_MAD_F32 v8, s9, v5, v8, 0, 0 ; D2820008 04220A09 V_MAD_F32 v8, s10, v6, v8, 0, 0 ; D2820008 04220C0A V_MAD_F32 v8, s11, v7, v8, 0, 0 ; D2820008 04220E0B V_MAD_F32 v4, v0, v8, v4, 0, 0 ; D2820004 04121100 V_MUL_F32_e32 v8, s12, v3 ; 1010060C V_MAD_F32 v8, s13, v5, v8, 0, 0 ; D2820008 04220A0D V_MAD_F32 v8, s14, v6, v8, 0, 0 ; D2820008 04220C0E V_MAD_F32 v8, s15, v7, v8, 0, 0 ; D2820008 04220E0F V_MAD_F32 v4, v1, v8, v4, 0, 0 ; D2820004 04121101 V_MUL_F32_e32 v3, s16, v3 ; 10060610 V_MAD_F32 v3, s17, v5, v3, 0, 0 ; D2820003 040E0A11 V_MAD_F32 v3, s18, v6, v3, 0, 0 ; D2820003 040E0C12 V_MAD_F32 v3, s19, v7, v3, 0, 0 ; D2820003 040E0E13 V_ADD_F32_e32 v3, v4, v3 ; 06060704 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x15 ; C20A0115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v4, s20 ; 7E080214 V_MUL_F32_e32 v5, s4, v4 ; 100A0804 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x11 ; C20A0111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s20 ; 7E0C0214 V_MAD_F32 v5, s5, v6, v5, 0, 0 ; D2820005 04160C05 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x19 ; C20A0119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s20 ; 7E0E0214 V_MAD_F32 v5, s6, v7, v5, 0, 0 ; D2820005 04160E06 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x1d ; C20A011D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s20 ; 7E100214 V_MAD_F32 v5, s7, v8, v5, 0, 0 ; D2820005 04161007 V_MUL_F32_e32 v5, v14, v5 ; 100A0B0E V_MUL_F32_e32 v9, s8, v4 ; 10120808 V_MAD_F32 v9, s9, v6, v9, 0, 0 ; D2820009 04260C09 V_MAD_F32 v9, s10, v7, v9, 0, 0 ; D2820009 04260E0A V_MAD_F32 v9, s11, v8, v9, 0, 0 ; D2820009 0426100B V_MAD_F32 v5, v0, v9, v5, 0, 0 ; D2820005 04161300 V_MUL_F32_e32 v9, s12, v4 ; 1012080C V_MAD_F32 v9, s13, v6, v9, 0, 0 ; D2820009 04260C0D V_MAD_F32 v9, s14, v7, v9, 0, 0 ; D2820009 04260E0E V_MAD_F32 v9, s15, v8, v9, 0, 0 ; D2820009 0426100F V_MAD_F32 v5, v1, v9, v5, 0, 0 ; D2820005 04161301 V_MUL_F32_e32 v4, s16, v4 ; 10080810 V_MAD_F32 v4, s17, v6, v4, 0, 0 ; D2820004 04120C11 V_MAD_F32 v4, s18, v7, v4, 0, 0 ; D2820004 04120E12 V_MAD_F32 v4, s19, v8, v4, 0, 0 ; D2820004 04121013 V_ADD_F32_e32 v4, v5, v4 ; 06080905 S_BUFFER_LOAD_DWORD s20, s[0:3], 0x14 ; C20A0114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s20 ; 7E0A0214 V_MUL_F32_e32 v6, s4, v5 ; 100C0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v6, s5, v7, v6, 0, 0 ; D2820006 041A0E05 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s4 ; 7E100204 V_MAD_F32 v6, s6, v8, v6, 0, 0 ; D2820006 041A1006 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x1c ; C200011C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v9, s0 ; 7E120200 V_MAD_F32 v6, s7, v9, v6, 0, 0 ; D2820006 041A1207 V_MUL_F32_e32 v6, v14, v6 ; 100C0D0E V_MUL_F32_e32 v10, s8, v5 ; 10140A08 V_MAD_F32 v10, s9, v7, v10, 0, 0 ; D282000A 042A0E09 V_MAD_F32 v10, s10, v8, v10, 0, 0 ; D282000A 042A100A V_MAD_F32 v10, s11, v9, v10, 0, 0 ; D282000A 042A120B V_MAD_F32 v0, v0, v10, v6, 0, 0 ; D2820000 041A1500 V_MUL_F32_e32 v6, s12, v5 ; 100C0A0C V_MAD_F32 v6, s13, v7, v6, 0, 0 ; D2820006 041A0E0D V_MAD_F32 v6, s14, v8, v6, 0, 0 ; D2820006 041A100E V_MAD_F32 v6, s15, v9, v6, 0, 0 ; D2820006 041A120F V_MAD_F32 v0, v1, v6, v0, 0, 0 ; D2820000 04020D01 V_MUL_F32_e32 v1, s16, v5 ; 10020A10 V_MAD_F32 v1, s17, v7, v1, 0, 0 ; D2820001 04060E11 V_MAD_F32 v1, s18, v8, v1, 0, 0 ; D2820001 04061012 V_MAD_F32 v1, s19, v9, v1, 0, 0 ; D2820001 04061213 V_ADD_F32_e32 v0, v0, v1 ; 06000300 EXP 15, 12, 0, 1, 0, v0, v4, v3, v2 ; F80008CF 02030400 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[0], TEMP[0], IN[0] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = fmul float %39, %26 %44 = fmul float %40, %27 %45 = fmul float %41, %28 %46 = fmul float %42, %29 %47 = call i32 @llvm.SI.packf16(float %43, float %44) %48 = bitcast i32 %47 to float %49 = call i32 @llvm.SI.packf16(float %45, float %46) %50 = bitcast i32 %49 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %48, float %50, float %48, float %50) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 1, [m0] ; C80C0500 V_INTERP_P2_F32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 V_INTERP_P1_F32 v2, v0, 0, 1, [m0] ; C8080400 V_INTERP_P2_F32 v2, [v2], v1, 0, 1, [m0] ; C8090401 S_LOAD_DWORDX4 s[0:3], s[4:5], 0x0 ; C0800500 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x0 ; C0C60700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030202 V_INTERP_P1_F32 v6, v0, 3, 0, [m0] ; C8180300 V_INTERP_P2_F32 v6, [v6], v1, 3, 0, [m0] ; C8190301 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v6, v5, v6 ; 100C0D05 V_INTERP_P1_F32 v7, v0, 2, 0, [m0] ; C81C0200 V_INTERP_P2_F32 v7, [v7], v1, 2, 0, [m0] ; C81D0201 V_MUL_F32_e32 v7, v4, v7 ; 100E0F04 V_CVT_PKRTZ_F16_F32_e32 v6, v7, v6 ; 5E0C0D07 V_INTERP_P1_F32 v7, v0, 1, 0, [m0] ; C81C0100 V_INTERP_P2_F32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 V_MUL_F32_e32 v7, v3, v7 ; 100E0F03 V_INTERP_P1_F32 v8, v0, 0, 0, [m0] ; C8200000 V_INTERP_P2_F32 v8, [v8], v1, 0, 0, [m0] ; C8210001 V_MUL_F32_e32 v0, v2, v8 ; 10001102 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v7 ; 5E000F00 EXP 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..6] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[4].zwzw, IN[0] 1: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[0], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[3] 4: ADD TEMP[1].x, CONST[6].xxxx, CONST[6].yyyy 5: MUL TEMP[0], TEMP[0], TEMP[1].xxxx 6: MUL TEMP[1].w, CONST[5].wwww, IN[2].wwww 7: MOV TEMP[1].w, TEMP[1].wwww 8: MOV TEMP[2].xy, IN[1].xyxx 9: MOV TEMP[1].xyz, IN[2].xyzx 10: MOV TEMP[2].zw, IMM[0].yyxy 11: MOV OUT[1], TEMP[1] 12: MOV OUT[2], TEMP[2] 13: MOV OUT[0], TEMP[0] 14: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0 %32 = add i32 %5, %7 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fadd float %25, %34 %51 = fadd float %26, %35 %52 = fmul float %51, %17 %53 = fmul float %51, %18 %54 = fmul float %51, %19 %55 = fmul float %51, %20 %56 = fmul float %50, %13 %57 = fadd float %56, %52 %58 = fmul float %50, %14 %59 = fadd float %58, %53 %60 = fmul float %50, %15 %61 = fadd float %60, %54 %62 = fmul float %50, %16 %63 = fadd float %62, %55 %64 = fadd float %57, %21 %65 = fadd float %59, %22 %66 = fadd float %61, %23 %67 = fadd float %63, %24 %68 = fadd float %28, %29 %69 = fmul float %64, %68 %70 = fmul float %65, %68 %71 = fmul float %66, %68 %72 = fmul float %67, %68 %73 = fmul float %27, %49 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %73) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %69, float %70, float %71, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v4 ; 100A0804 EXP 15, 32, 0, 0, 0, v1, v2, v3, v5 ; F800020F 05030201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 1.000000e+00 ; 7E0A02F2 V_MOV_B32_e32 v6, 0.000000e+00 ; 7E0C0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x12 ; C2020112 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_ADD_F32_e32 v4, s4, v0 ; 06080004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x13 ; C2020113 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s4, v1 ; 06000204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v0 ; 10020004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x19 ; C2028119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MUL_F32_e32 v1, v1, v2 ; 10020501 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xe ; C202010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v4, s4, v5, 0, 0 ; D2820005 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xd ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v2 ; 100A0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0xc ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v2 ; 10000500 EXP 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 65280.0000} IMM[1] FLT32 {65280.0000, 255.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: DP2 TEMP[5].x, TEMP[3].wxxx, IMM[1].xyyy 24: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].xxxx 25: FRC TEMP[6].w, TEMP[5].xxxx 26: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].xxxx 27: MOV TEMP[0].z, TEMP[5].zzzz 28: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 29: FRC TEMP[5].z, TEMP[3].wwww 30: MOV TEMP[1].z, TEMP[5].zzzz 31: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 32: MOV TEMP[0].w, TEMP[3].wwww 33: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 34: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 35: UIF TEMP[4].xxxx :0 36: MOV TEMP[4].x, TEMP[3].wwww 37: ELSE :0 38: MOV TEMP[4].x, TEMP[3].zzzz 39: ENDIF 40: MOV TEMP[0].y, TEMP[4].xxxx 41: RCP TEMP[3].x, CONST[0].xxxx 42: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 43: FRC TEMP[5].z, TEMP[3].xxxx 44: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 45: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 46: MOV TEMP[6].x, -TEMP[3].xxxx 47: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 48: UIF TEMP[5].xxxx :0 49: MOV TEMP[5].x, TEMP[3].xxxx 50: ELSE :0 51: MOV TEMP[5].x, TEMP[6].xxxx 52: ENDIF 53: MOV TEMP[0].z, TEMP[5].xxxx 54: RCP TEMP[5].x, TEMP[5].xxxx 55: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 56: FRC TEMP[4].w, TEMP[4].wwww 57: MOV TEMP[0].w, TEMP[4].wwww 58: RCP TEMP[0].x, TEMP[3].xxxx 59: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 60: MOV TEMP[2].xy, TEMP[3].xyxx 61: FRC TEMP[3].xy, TEMP[2] 62: MOV TEMP[0].xy, TEMP[3].xyxx 63: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 64: MOV TEMP[0].xy, TEMP[2].xyxx 65: ADD TEMP[2].xy, TEMP[1], TEMP[0] 66: MOV TEMP[0].xy, TEMP[2].xyxx 67: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: FRC TEMP[2].zw, TEMP[2].xyxy 70: MOV TEMP[0].zw, TEMP[2].wwzw 71: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 74: MOV TEMP[0].xy, TEMP[2].xyxx 75: RCP TEMP[1].x, CONST[3].xxxx 76: RCP TEMP[2].x, CONST[3].yyyy 77: MOV TEMP[1].y, TEMP[2].xxxx 78: MUL TEMP[2].xy, TEMP[0], TEMP[1] 79: MOV TEMP[2].xy, TEMP[2].xyyy 80: MOV TEMP[2].w, IMM[0].zzzz 81: TXL TEMP[2], TEMP[2], SAMP[1], 2D 82: MOV TEMP[0].xyz, TEMP[2] 83: MOV TEMP[1].x, TEMP[2].wwww 84: ELSE :0 85: MOV TEMP[2].xy, IN[1].xyyy 86: TEX TEMP[2], TEMP[2], SAMP[1], 2D 87: MOV TEMP[0].xyz, TEMP[2] 88: MOV TEMP[1].x, TEMP[2].wwww 89: ENDIF 90: MUL TEMP[1].w, TEMP[1].xxxx, IN[0].wwww 91: MOV TEMP[0].w, TEMP[1].wwww 92: MOV OUT[0], TEMP[0] 93: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %41 = fcmp olt float 5.000000e-01, %24 %42 = sext i1 %41 to i32 %43 = bitcast i32 %42 to float %44 = bitcast float %43 to i32 %45 = icmp ne i32 %44, 0 br i1 %45, label %IF, label %ELSE IF: ; preds = %main_body %46 = fmul float %26, %39 %47 = fmul float %27, %40 %48 = call float @llvm.AMDIL.fraction.(float %46) %49 = call float @llvm.AMDIL.fraction.(float %47) %50 = fsub float -0.000000e+00, %48 %51 = fadd float %46, %50 %52 = fsub float -0.000000e+00, %49 %53 = fadd float %47, %52 %54 = fsub float -0.000000e+00, %51 %55 = fmul float %39, %26 %56 = fadd float %55, %54 %57 = fsub float -0.000000e+00, %53 %58 = fmul float %40, %27 %59 = fadd float %58, %57 %60 = fadd float %51, 5.000000e-01 %61 = fadd float %53, 5.000000e-01 %62 = fdiv float 1.000000e+00, %26 %63 = fdiv float 1.000000e+00, %27 %64 = fmul float %60, %62 %65 = fmul float %61, %63 %66 = bitcast float %64 to i32 %67 = bitcast float %65 to i32 %68 = bitcast float 0.000000e+00 to i32 %69 = insertelement <4 x i32> undef, i32 %66, i32 0 %70 = insertelement <4 x i32> %69, i32 %67, i32 1 %71 = insertelement <4 x i32> %70, i32 %68, i32 2 %72 = insertelement <4 x i32> %71, i32 undef, i32 3 %73 = bitcast <8 x i32> %31 to <32 x i8> %74 = bitcast <4 x i32> %33 to <16 x i8> %75 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %72, <32 x i8> %73, <16 x i8> %74, i32 2) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 3 %78 = fsub float -0.000000e+00, %25 %79 = fadd float 1.024000e+03, %78 %80 = fmul float %77, 6.528000e+04 %81 = fmul float %76, 2.550000e+02 %82 = fadd float %80, %81 %83 = fadd float %82, 5.000000e-01 %84 = call float @llvm.AMDIL.fraction.(float %83) %85 = fsub float -0.000000e+00, %84 %86 = fadd float %85, %83 %87 = fmul float %25, %76 %88 = fadd float %87, 5.000000e-01 %89 = call float @llvm.AMDIL.fraction.(float %88) %90 = fsub float -0.000000e+00, %89 %91 = fadd float %88, %90 %92 = fadd float %86, 5.000000e-01 %93 = fadd float %91, 5.000000e-01 %94 = fcmp oge float %79, 0.000000e+00 %95 = sext i1 %94 to i32 %96 = bitcast i32 %95 to float %97 = bitcast float %96 to i32 %98 = icmp ne i32 %97, 0 %. = select i1 %98, float %93, float %92 %99 = fdiv float 1.000000e+00, %24 %100 = fmul float %28, %99 %101 = fadd float %100, 5.000000e-01 %102 = call float @llvm.AMDIL.fraction.(float %101) %103 = fsub float -0.000000e+00, %102 %104 = fadd float %103, %101 %105 = fmul float %104, %. %106 = fsub float -0.000000e+00, %104 %107 = fcmp oge float %105, 0.000000e+00 %108 = sext i1 %107 to i32 %109 = bitcast i32 %108 to float %110 = bitcast float %109 to i32 %111 = icmp ne i32 %110, 0 %temp20.0 = select i1 %111, float %104, float %106 %112 = fdiv float 1.000000e+00, %temp20.0 %113 = fmul float %112, %. %114 = call float @llvm.AMDIL.fraction.(float %113) %115 = fdiv float 1.000000e+00, %104 %116 = fmul float %114, %temp20.0 %117 = fmul float %115, %. %118 = call float @llvm.AMDIL.fraction.(float %116) %119 = call float @llvm.AMDIL.fraction.(float %117) %120 = fsub float -0.000000e+00, %118 %121 = fadd float %120, %116 %122 = fsub float -0.000000e+00, %119 %123 = fadd float %122, %117 %124 = fadd float %56, %121 %125 = fadd float %59, %123 %126 = fmul float %124, %24 %127 = fmul float %125, %24 %128 = call float @llvm.AMDIL.fraction.(float %126) %129 = call float @llvm.AMDIL.fraction.(float %127) %130 = fsub float -0.000000e+00, %128 %131 = fadd float %130, %126 %132 = fsub float -0.000000e+00, %129 %133 = fadd float %132, %127 %134 = fadd float %131, 5.000000e-01 %135 = fadd float %133, 5.000000e-01 %136 = fdiv float 1.000000e+00, %28 %137 = fdiv float 1.000000e+00, %29 %138 = fmul float %134, %136 %139 = fmul float %135, %137 %140 = bitcast float %138 to i32 %141 = bitcast float %139 to i32 %142 = bitcast float 0.000000e+00 to i32 %143 = insertelement <4 x i32> undef, i32 %140, i32 0 %144 = insertelement <4 x i32> %143, i32 %141, i32 1 %145 = insertelement <4 x i32> %144, i32 %142, i32 2 %146 = insertelement <4 x i32> %145, i32 undef, i32 3 %147 = bitcast <8 x i32> %35 to <32 x i8> %148 = bitcast <4 x i32> %37 to <16 x i8> %149 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %146, <32 x i8> %147, <16 x i8> %148, i32 2) %150 = extractelement <4 x float> %149, i32 0 %151 = extractelement <4 x float> %149, i32 1 %152 = extractelement <4 x float> %149, i32 2 br label %ENDIF ELSE: ; preds = %main_body %153 = bitcast float %39 to i32 %154 = bitcast float %40 to i32 %155 = insertelement <2 x i32> undef, i32 %153, i32 0 %156 = insertelement <2 x i32> %155, i32 %154, i32 1 %157 = bitcast <8 x i32> %35 to <32 x i8> %158 = bitcast <4 x i32> %37 to <16 x i8> %159 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %156, <32 x i8> %157, <16 x i8> %158, i32 2) %160 = extractelement <4 x float> %159, i32 0 %161 = extractelement <4 x float> %159, i32 1 %162 = extractelement <4 x float> %159, i32 2 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %159, %ELSE ], [ %149, %IF ] %temp1.0 = phi float [ %151, %IF ], [ %161, %ELSE ] %temp2.0 = phi float [ %152, %IF ], [ %162, %ELSE ] %temp.0 = phi float [ %150, %IF ], [ %160, %ELSE ] %163 = extractelement <4 x float> %.sink, i32 3 %164 = fmul float %163, %38 %165 = call i32 @llvm.SI.packf16(float %temp.0, float %temp1.0) %166 = bitcast i32 %165 to float %167 = call i32 @llvm.SI.packf16(float %temp2.0, float %164) %168 = bitcast i32 %167 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %166, float %168, float %166, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v4, v0, 1, 1, [m0] ; C8100500 V_INTERP_P2_F32 v4, [v4], v1, 1, 1, [m0] ; C8110501 V_INTERP_P1_F32 v3, v0, 0, 1, [m0] ; C80C0400 V_INTERP_P2_F32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 V_INTERP_P1_F32 v2, v0, 3, 0, [m0] ; C8080300 V_INTERP_P2_F32 v2, [v2], v1, 3, 0, [m0] ; C8090301 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x4 ; C0840504 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x8 ; C0C60708 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s20, s[0:3], 0x0 ; C20A0100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_CMP_GT_F32_e64 s[22:23], s20, 5.000000e-01, 0, 0 ; D0080016 0001E014 V_CNDMASK_B32_e64 v0, 0, -1, s[22:23], 0, 0, 0, 0 ; D2000000 00598280 V_CMP_EQ_I32_e64 s[22:23], v0, 0, 0, 0 ; D1040016 00010100 S_AND_SAVEEXEC_B64 s[22:23], s[22:23] ; BE962416 S_XOR_B64 s[22:23], exec, s[22:23] ; 8996167E IMAGE_SAMPLE v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[8:11] ; F0800F00 00430503 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_SAVEEXEC_B64 s[22:23], s[22:23] ; BE962516 S_XOR_B64 exec, exec, s[22:23] ; 89FE167E S_CBRANCH_EXECZ BB0_4 ; BF880000 S_BUFFER_LOAD_DWORD s21, s[0:3], 0xd ; C20A810D S_BUFFER_LOAD_DWORD s24, s[0:3], 0xc ; C20C010C S_BUFFER_LOAD_DWORD s25, s[0:3], 0x9 ; C20C8109 S_BUFFER_LOAD_DWORD s26, s[0:3], 0x8 ; C20D0108 S_BUFFER_LOAD_DWORD s27, s[0:3], 0x4 ; C20D8104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s21 ; 7E000215 V_MOV_B32_e32 v1, s24 ; 7E020218 V_MOV_B32_e32 v5, s25 ; 7E0A0219 V_MOV_B32_e32 v6, s26 ; 7E0C021A V_MOV_B32_e32 v7, s27 ; 7E0E021B V_MOV_B32_e32 v8, s20 ; 7E100214 V_MUL_F32_e32 v9, v4, v5 ; 10120B04 V_FRACT_F32_e32 v10, v9 ; 7E144109 V_SUB_F32_e32 v10, v9, v10 ; 08141509 V_ADD_F32_e32 v11, 5.000000e-01, v10 ; 061614F0 V_RCP_F32_e32 v5, v5 ; 7E0A5505 V_MUL_F32_e32 v12, v11, v5 ; 10180B0B V_MUL_F32_e32 v3, v3, v6 ; 10060D03 V_FRACT_F32_e32 v4, v3 ; 7E084103 V_SUB_F32_e32 v4, v3, v4 ; 08080903 V_ADD_F32_e32 v5, 5.000000e-01, v4 ; 060A08F0 V_RCP_F32_e32 v6, v6 ; 7E0C5506 V_MUL_F32_e32 v11, v5, v6 ; 10160D05 V_MOV_B32_e32 v13, 0 ; 7E1A0280 S_LOAD_DWORDX4 s[24:27], s[4:5], 0x0 ; C08C0500 S_LOAD_DWORDX8 s[28:35], s[6:7], 0x0 ; C0CE0700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE_L v[5:6], 9, 0, 0, 0, 0, 0, 0, 0, v[11:14], s[28:35], s[24:27] ; F0900900 00C7050B V_MOV_B32_e32 v15, 2.550000e+02 ; 7E1E02FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v15, v5, v15 ; 101E1F05 V_MOV_B32_e32 v16, 6.528000e+04 ; 7E2002FF 477F0000 V_MAD_F32 v15, v6, v16, v15, 0, 0 ; D282000F 043E2106 V_ADD_F32_e32 v15, 5.000000e-01, v15 ; 061E1EF0 V_FRACT_F32_e32 v16, v15 ; 7E20410F V_SUB_F32_e32 v15, v15, v16 ; 081E210F V_ADD_F32_e32 v15, 5.000000e-01, v15 ; 061E1EF0 V_MAD_F32 v5, v7, v5, 5.000000e-01, 0, 0 ; D2820005 03C20B07 V_FRACT_F32_e32 v6, v5 ; 7E0C4105 V_SUB_F32_e32 v5, v5, v6 ; 080A0D05 V_ADD_F32_e32 v5, 5.000000e-01, v5 ; 060A0AF0 V_SUB_F32_e32 v6, 1.024000e+03, v7 ; 080C0EFF 44800000 V_CMP_GE_F32_e64 s[24:25], v6, 0.000000e+00, 0, 0 ; D00C0018 00010106 V_CNDMASK_B32_e64 v5, v15, v5, s[24:25], 0, 0, 0, 0 ; D2000005 00620B0F V_RCP_F32_e32 v6, v8 ; 7E0C5508 V_MAD_F32 v6, v1, v6, 5.000000e-01, 0, 0 ; D2820006 03C20D01 V_FRACT_F32_e32 v7, v6 ; 7E0E4106 V_SUB_F32_e32 v6, v6, v7 ; 080C0F06 V_MUL_F32_e32 v7, v6, v5 ; 100E0B06 V_CMP_GE_F32_e64 s[24:25], v7, 0.000000e+00, 0, 0 ; D00C0018 00010107 V_MOV_B32_e32 v7, 0x80000000 ; 7E0E02FF 80000000 V_XOR_B32_e32 v7, v6, v7 ; 3A0E0F06 V_CNDMASK_B32_e64 v7, v7, v6, s[24:25], 0, 0, 0, 0 ; D2000007 00620D07 V_RCP_F32_e32 v15, v7 ; 7E1E5507 V_MUL_F32_e32 v15, v15, v5 ; 101E0B0F V_FRACT_F32_e32 v15, v15 ; 7E1E410F V_MUL_F32_e32 v7, v15, v7 ; 100E0F0F V_FRACT_F32_e32 v15, v7 ; 7E1E4107 V_SUB_F32_e32 v7, v7, v15 ; 080E1F07 V_SUB_F32_e32 v3, v3, v4 ; 08060903 V_ADD_F32_e32 v3, v3, v7 ; 06060F03 V_MUL_F32_e32 v3, v3, v8 ; 10061103 V_FRACT_F32_e32 v4, v3 ; 7E084103 V_SUB_F32_e32 v3, v3, v4 ; 08060903 V_ADD_F32_e32 v3, 5.000000e-01, v3 ; 060606F0 V_RCP_F32_e32 v1, v1 ; 7E025501 V_MUL_F32_e32 v11, v3, v1 ; 10160303 V_RCP_F32_e32 v1, v6 ; 7E025506 V_MUL_F32_e32 v1, v1, v5 ; 10020B01 V_FRACT_F32_e32 v3, v1 ; 7E064101 V_SUB_F32_e32 v1, v1, v3 ; 08020701 V_SUB_F32_e32 v3, v9, v10 ; 08061509 V_ADD_F32_e32 v1, v3, v1 ; 06020303 V_MUL_F32_e32 v1, v1, v8 ; 10021101 V_FRACT_F32_e32 v3, v1 ; 7E064101 V_SUB_F32_e32 v1, v1, v3 ; 08020701 V_ADD_F32_e32 v1, 5.000000e-01, v1 ; 060202F0 V_RCP_F32_e32 v0, v0 ; 7E005500 V_MUL_F32_e32 v12, v1, v0 ; 10180101 IMAGE_SAMPLE_L v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[11:14], s[12:19], s[8:11] ; F0900F00 0043050B S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_B64 exec, exec, s[22:23] ; 88FE167E V_CVT_PKRTZ_F16_F32_e32 v0, v5, v6 ; 5E000D05 V_MUL_F32_e32 v1, v8, v2 ; 10020508 V_CVT_PKRTZ_F16_F32_e32 v1, v7, v1 ; 5E020307 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v4 ; 10020804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 EXP 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v2, v6, v5, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v1, v1, v7, v6, 0, 0 ; D2820001 041A0F01 V_MOV_B32_e32 v2, 1.000000e+00 ; 7E0402F2 V_MOV_B32_e32 v3, 0.000000e+00 ; 7E060280 EXP 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v4, s4 ; 7E080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v0, v1, v6, v5, 0, 0 ; D2820000 04160D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v0 ; 10020004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x23 ; C2020123 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x11 ; C2028111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MUL_F32_e32 v1, v1, v2 ; 10020501 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x22 ; C2020122 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v4, s4, v5, 0, 0 ; D2820005 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v2 ; 100A0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x20 ; C2000120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v2 ; 10000500 EXP 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 255.0000} IMM[1] FLT32 { 256.0000, 1.0000, -1.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: MUL TEMP[5].z, TEMP[3].xxxx, IMM[0].wwww 24: MAD TEMP[6].w, TEMP[3].wwww, IMM[0].wwww, IMM[0].xxxx 25: FRC TEMP[7].z, TEMP[6].wwww 26: ADD TEMP[6].w, TEMP[6].wwww, -TEMP[7].zzzz 27: MAD TEMP[5].z, TEMP[6].wwww, IMM[1].xxxx, TEMP[5].zzzz 28: ADD TEMP[5].z, TEMP[5].zzzz, IMM[0].xxxx 29: FRC TEMP[6].w, TEMP[5].zzzz 30: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].zzzz 31: MOV TEMP[0].z, TEMP[5].zzzz 32: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 33: FRC TEMP[5].z, TEMP[3].wwww 34: MOV TEMP[1].z, TEMP[5].zzzz 35: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 36: MOV TEMP[0].w, TEMP[3].wwww 37: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 38: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 39: UIF TEMP[4].xxxx :0 40: MOV TEMP[4].x, TEMP[3].wwww 41: ELSE :0 42: MOV TEMP[4].x, TEMP[3].zzzz 43: ENDIF 44: MOV TEMP[0].y, TEMP[4].xxxx 45: RCP TEMP[3].x, CONST[0].xxxx 46: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 47: FRC TEMP[5].z, TEMP[3].xxxx 48: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 49: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 50: MOV TEMP[6].x, -TEMP[3].xxxx 51: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 52: UIF TEMP[5].xxxx :0 53: MOV TEMP[5].x, TEMP[3].xxxx 54: ELSE :0 55: MOV TEMP[5].x, TEMP[6].xxxx 56: ENDIF 57: MOV TEMP[0].z, TEMP[5].xxxx 58: RCP TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 60: FRC TEMP[4].w, TEMP[4].wwww 61: MOV TEMP[0].w, TEMP[4].wwww 62: RCP TEMP[0].x, TEMP[3].xxxx 63: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 64: MOV TEMP[2].xy, TEMP[3].xyxx 65: FRC TEMP[3].xy, TEMP[2] 66: MOV TEMP[0].xy, TEMP[3].xyxx 67: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: ADD TEMP[2].xy, TEMP[1], TEMP[0] 70: MOV TEMP[0].xy, TEMP[2].xyxx 71: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: FRC TEMP[2].zw, TEMP[2].xyxy 74: MOV TEMP[0].zw, TEMP[2].wwzw 75: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 76: MOV TEMP[0].xy, TEMP[2].xyxx 77: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 78: MOV TEMP[0].xy, TEMP[2].xyxx 79: RCP TEMP[1].x, CONST[3].xxxx 80: RCP TEMP[2].x, CONST[3].yyyy 81: MOV TEMP[1].y, TEMP[2].xxxx 82: MUL TEMP[1].xy, TEMP[0], TEMP[1] 83: MOV TEMP[0].xy, TEMP[1].xyxx 84: MOV TEMP[0].zw, IMM[0].zzzz 85: MAD TEMP[1].xyw, TEMP[0], IMM[1].yzyy, IMM[1].wyww 86: MOV TEMP[2].xy, TEMP[1].xyyy 87: MOV TEMP[2].w, TEMP[1].wwww 88: TXL TEMP[2], TEMP[2], SAMP[1], 2D 89: MOV TEMP[0], TEMP[2] 90: ELSE :0 91: MAD TEMP[1].xy, IN[1], IMM[1].yzyy, IMM[1].wyww 92: MOV TEMP[1].xy, TEMP[1].xyyy 93: TEX TEMP[1], TEMP[1], SAMP[1], 2D 94: MOV TEMP[0], TEMP[1] 95: ENDIF 96: MUL TEMP[0], TEMP[0], IN[0] 97: MOV OUT[0], TEMP[0] 98: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = fcmp olt float 5.000000e-01, %24 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 br i1 %48, label %IF, label %ELSE IF: ; preds = %main_body %49 = fmul float %26, %42 %50 = fmul float %27, %43 %51 = call float @llvm.AMDIL.fraction.(float %49) %52 = call float @llvm.AMDIL.fraction.(float %50) %53 = fsub float -0.000000e+00, %51 %54 = fadd float %49, %53 %55 = fsub float -0.000000e+00, %52 %56 = fadd float %50, %55 %57 = fsub float -0.000000e+00, %54 %58 = fmul float %42, %26 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %43, %27 %62 = fadd float %61, %60 %63 = fadd float %54, 5.000000e-01 %64 = fadd float %56, 5.000000e-01 %65 = fdiv float 1.000000e+00, %26 %66 = fdiv float 1.000000e+00, %27 %67 = fmul float %63, %65 %68 = fmul float %64, %66 %69 = bitcast float %67 to i32 %70 = bitcast float %68 to i32 %71 = bitcast float 0.000000e+00 to i32 %72 = insertelement <4 x i32> undef, i32 %69, i32 0 %73 = insertelement <4 x i32> %72, i32 %70, i32 1 %74 = insertelement <4 x i32> %73, i32 %71, i32 2 %75 = insertelement <4 x i32> %74, i32 undef, i32 3 %76 = bitcast <8 x i32> %31 to <32 x i8> %77 = bitcast <4 x i32> %33 to <16 x i8> %78 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 3 %81 = fsub float -0.000000e+00, %25 %82 = fadd float 1.024000e+03, %81 %83 = fmul float %79, 2.550000e+02 %84 = fmul float %80, 2.550000e+02 %85 = fadd float %84, 5.000000e-01 %86 = call float @llvm.AMDIL.fraction.(float %85) %87 = fsub float -0.000000e+00, %86 %88 = fadd float %85, %87 %89 = fmul float %88, 2.560000e+02 %90 = fadd float %89, %83 %91 = fadd float %90, 5.000000e-01 %92 = call float @llvm.AMDIL.fraction.(float %91) %93 = fsub float -0.000000e+00, %92 %94 = fadd float %93, %91 %95 = fmul float %25, %79 %96 = fadd float %95, 5.000000e-01 %97 = call float @llvm.AMDIL.fraction.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %96, %98 %100 = fadd float %94, 5.000000e-01 %101 = fadd float %99, 5.000000e-01 %102 = fcmp oge float %82, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float %101, float %100 %107 = fdiv float 1.000000e+00, %24 %108 = fmul float %28, %107 %109 = fadd float %108, 5.000000e-01 %110 = call float @llvm.AMDIL.fraction.(float %109) %111 = fsub float -0.000000e+00, %110 %112 = fadd float %111, %109 %113 = fmul float %112, %. %114 = fsub float -0.000000e+00, %112 %115 = fcmp oge float %113, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %116 to float %118 = bitcast float %117 to i32 %119 = icmp ne i32 %118, 0 %temp20.0 = select i1 %119, float %112, float %114 %120 = fdiv float 1.000000e+00, %temp20.0 %121 = fmul float %120, %. %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = fdiv float 1.000000e+00, %112 %124 = fmul float %122, %temp20.0 %125 = fmul float %123, %. %126 = call float @llvm.AMDIL.fraction.(float %124) %127 = call float @llvm.AMDIL.fraction.(float %125) %128 = fsub float -0.000000e+00, %126 %129 = fadd float %128, %124 %130 = fsub float -0.000000e+00, %127 %131 = fadd float %130, %125 %132 = fadd float %59, %129 %133 = fadd float %62, %131 %134 = fmul float %132, %24 %135 = fmul float %133, %24 %136 = call float @llvm.AMDIL.fraction.(float %134) %137 = call float @llvm.AMDIL.fraction.(float %135) %138 = fsub float -0.000000e+00, %136 %139 = fadd float %138, %134 %140 = fsub float -0.000000e+00, %137 %141 = fadd float %140, %135 %142 = fadd float %139, 5.000000e-01 %143 = fadd float %141, 5.000000e-01 %144 = fdiv float 1.000000e+00, %28 %145 = fdiv float 1.000000e+00, %29 %146 = fmul float %142, %144 %147 = fmul float %143, %145 %148 = fmul float %146, 1.000000e+00 %149 = fadd float %148, 0.000000e+00 %150 = fmul float %147, -1.000000e+00 %151 = fadd float %150, 1.000000e+00 %152 = fmul float 0.000000e+00, 1.000000e+00 %153 = fadd float %152, 0.000000e+00 %154 = bitcast float %149 to i32 %155 = bitcast float %151 to i32 %156 = bitcast float %153 to i32 %157 = insertelement <4 x i32> undef, i32 %154, i32 0 %158 = insertelement <4 x i32> %157, i32 %155, i32 1 %159 = insertelement <4 x i32> %158, i32 %156, i32 2 %160 = insertelement <4 x i32> %159, i32 undef, i32 3 %161 = bitcast <8 x i32> %35 to <32 x i8> %162 = bitcast <4 x i32> %37 to <16 x i8> %163 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %160, <32 x i8> %161, <16 x i8> %162, i32 2) %164 = extractelement <4 x float> %163, i32 0 %165 = extractelement <4 x float> %163, i32 1 %166 = extractelement <4 x float> %163, i32 2 br label %ENDIF ELSE: ; preds = %main_body %167 = fmul float %42, 1.000000e+00 %168 = fadd float %167, 0.000000e+00 %169 = fmul float %43, -1.000000e+00 %170 = fadd float %169, 1.000000e+00 %171 = bitcast float %168 to i32 %172 = bitcast float %170 to i32 %173 = insertelement <2 x i32> undef, i32 %171, i32 0 %174 = insertelement <2 x i32> %173, i32 %172, i32 1 %175 = bitcast <8 x i32> %35 to <32 x i8> %176 = bitcast <4 x i32> %37 to <16 x i8> %177 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %174, <32 x i8> %175, <16 x i8> %176, i32 2) %178 = extractelement <4 x float> %177, i32 0 %179 = extractelement <4 x float> %177, i32 1 %180 = extractelement <4 x float> %177, i32 2 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %177, %ELSE ], [ %163, %IF ] %temp2.0 = phi float [ %166, %IF ], [ %180, %ELSE ] %temp1.0 = phi float [ %165, %IF ], [ %179, %ELSE ] %temp.0 = phi float [ %164, %IF ], [ %178, %ELSE ] %181 = extractelement <4 x float> %.sink, i32 3 %182 = fmul float %temp.0, %38 %183 = fmul float %temp1.0, %39 %184 = fmul float %temp2.0, %40 %185 = fmul float %181, %41 %186 = call i32 @llvm.SI.packf16(float %182, float %183) %187 = bitcast i32 %186 to float %188 = call i32 @llvm.SI.packf16(float %184, float %185) %189 = bitcast i32 %188 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %187, float %189, float %187, float %189) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v6, v0, 1, 1, [m0] ; C8180500 V_INTERP_P2_F32 v6, [v6], v1, 1, 1, [m0] ; C8190501 V_INTERP_P1_F32 v7, v0, 0, 1, [m0] ; C81C0400 V_INTERP_P2_F32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 V_INTERP_P1_F32 v2, v0, 3, 0, [m0] ; C8080300 V_INTERP_P2_F32 v2, [v2], v1, 3, 0, [m0] ; C8090301 V_INTERP_P1_F32 v3, v0, 2, 0, [m0] ; C80C0200 V_INTERP_P2_F32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 V_INTERP_P1_F32 v4, v0, 1, 0, [m0] ; C8100100 V_INTERP_P2_F32 v4, [v4], v1, 1, 0, [m0] ; C8110101 V_INTERP_P1_F32 v5, v0, 0, 0, [m0] ; C8140000 V_INTERP_P2_F32 v5, [v5], v1, 0, 0, [m0] ; C8150001 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x4 ; C0840504 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x8 ; C0C60708 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s20, s[0:3], 0x0 ; C20A0100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_CMP_GT_F32_e64 s[22:23], s20, 5.000000e-01, 0, 0 ; D0080016 0001E014 V_CNDMASK_B32_e64 v0, 0, -1, s[22:23], 0, 0, 0, 0 ; D2000000 00598280 V_CMP_EQ_I32_e64 s[22:23], v0, 0, 0, 0 ; D1040016 00010100 S_AND_SAVEEXEC_B64 s[22:23], s[22:23] ; BE962416 S_XOR_B64 s[22:23], exec, s[22:23] ; 8996167E V_SUB_F32_e32 v8, 1.000000e+00, v6 ; 08100CF2 IMAGE_SAMPLE v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[12:19], s[8:11] ; F0800F00 00430907 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_SAVEEXEC_B64 s[22:23], s[22:23] ; BE962516 S_XOR_B64 exec, exec, s[22:23] ; 89FE167E S_CBRANCH_EXECZ BB0_4 ; BF880000 S_BUFFER_LOAD_DWORD s21, s[0:3], 0xd ; C20A810D S_BUFFER_LOAD_DWORD s24, s[0:3], 0xc ; C20C010C S_BUFFER_LOAD_DWORD s25, s[0:3], 0x9 ; C20C8109 S_BUFFER_LOAD_DWORD s26, s[0:3], 0x8 ; C20D0108 S_BUFFER_LOAD_DWORD s27, s[0:3], 0x4 ; C20D8104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s21 ; 7E000215 V_MOV_B32_e32 v1, s24 ; 7E020218 V_MOV_B32_e32 v9, s25 ; 7E120219 V_MOV_B32_e32 v10, s26 ; 7E14021A V_MOV_B32_e32 v11, s27 ; 7E16021B V_MOV_B32_e32 v12, s20 ; 7E180214 V_MUL_F32_e32 v6, v6, v9 ; 100C1306 V_FRACT_F32_e32 v13, v6 ; 7E1A4106 V_SUB_F32_e32 v13, v6, v13 ; 081A1B06 V_ADD_F32_e32 v14, 5.000000e-01, v13 ; 061C1AF0 V_RCP_F32_e32 v9, v9 ; 7E125509 V_MUL_F32_e32 v15, v14, v9 ; 101E130E V_MUL_F32_e32 v7, v7, v10 ; 100E1507 V_FRACT_F32_e32 v8, v7 ; 7E104107 V_SUB_F32_e32 v8, v7, v8 ; 08101107 V_ADD_F32_e32 v9, 5.000000e-01, v8 ; 061210F0 V_RCP_F32_e32 v10, v10 ; 7E14550A V_MUL_F32_e32 v14, v9, v10 ; 101C1509 V_MOV_B32_e32 v16, 0 ; 7E200280 S_LOAD_DWORDX4 s[24:27], s[4:5], 0x0 ; C08C0500 S_LOAD_DWORDX8 s[28:35], s[6:7], 0x0 ; C0CE0700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE_L v[9:10], 9, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[28:35], s[24:27] ; F0900900 00C7090E V_MOV_B32_e32 v18, 2.550000e+02 ; 7E2402FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v19, v10, v18, 5.000000e-01, 0, 0 ; D2820013 03C2250A V_FRACT_F32_e32 v20, v19 ; 7E284113 V_SUB_F32_e32 v19, v19, v20 ; 08262913 V_MUL_F32_e32 v19, 2.560000e+02, v19 ; 102626FF 43800000 V_MAD_F32 v18, v9, v18, v19, 0, 0 ; D2820012 044E2509 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_FRACT_F32_e32 v19, v18 ; 7E264112 V_SUB_F32_e32 v18, v18, v19 ; 08242712 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_MAD_F32 v9, v11, v9, 5.000000e-01, 0, 0 ; D2820009 03C2130B V_FRACT_F32_e32 v10, v9 ; 7E144109 V_SUB_F32_e32 v9, v9, v10 ; 08121509 V_ADD_F32_e32 v9, 5.000000e-01, v9 ; 061212F0 V_SUB_F32_e32 v10, 1.024000e+03, v11 ; 081416FF 44800000 V_CMP_GE_F32_e64 s[24:25], v10, 0.000000e+00, 0, 0 ; D00C0018 0001010A V_CNDMASK_B32_e64 v9, v18, v9, s[24:25], 0, 0, 0, 0 ; D2000009 00621312 V_RCP_F32_e32 v10, v12 ; 7E14550C V_MAD_F32 v10, v1, v10, 5.000000e-01, 0, 0 ; D282000A 03C21501 V_FRACT_F32_e32 v11, v10 ; 7E16410A V_SUB_F32_e32 v10, v10, v11 ; 0814170A V_MUL_F32_e32 v11, v10, v9 ; 1016130A V_CMP_GE_F32_e64 s[24:25], v11, 0.000000e+00, 0, 0 ; D00C0018 0001010B V_MOV_B32_e32 v11, 0x80000000 ; 7E1602FF 80000000 V_XOR_B32_e32 v11, v10, v11 ; 3A16170A V_CNDMASK_B32_e64 v11, v11, v10, s[24:25], 0, 0, 0, 0 ; D200000B 0062150B V_RCP_F32_e32 v18, v11 ; 7E24550B V_MUL_F32_e32 v18, v18, v9 ; 10241312 V_FRACT_F32_e32 v18, v18 ; 7E244112 V_MUL_F32_e32 v11, v18, v11 ; 10161712 V_FRACT_F32_e32 v18, v11 ; 7E24410B V_SUB_F32_e32 v11, v11, v18 ; 0816250B V_SUB_F32_e32 v7, v7, v8 ; 080E1107 V_ADD_F32_e32 v7, v7, v11 ; 060E1707 V_MUL_F32_e32 v7, v7, v12 ; 100E1907 V_FRACT_F32_e32 v8, v7 ; 7E104107 V_SUB_F32_e32 v7, v7, v8 ; 080E1107 V_ADD_F32_e32 v7, 5.000000e-01, v7 ; 060E0EF0 V_RCP_F32_e32 v1, v1 ; 7E025501 V_MUL_F32_e32 v14, v7, v1 ; 101C0307 V_RCP_F32_e32 v1, v10 ; 7E02550A V_MUL_F32_e32 v1, v1, v9 ; 10021301 V_FRACT_F32_e32 v7, v1 ; 7E0E4101 V_SUB_F32_e32 v1, v1, v7 ; 08020F01 V_SUB_F32_e32 v6, v6, v13 ; 080C1B06 V_ADD_F32_e32 v1, v6, v1 ; 06020306 V_MUL_F32_e32 v1, v1, v12 ; 10021901 V_FRACT_F32_e32 v6, v1 ; 7E0C4101 V_SUB_F32_e32 v1, v6, v1 ; 08020306 V_SUBREV_F32_e32 v1, 5.000000e-01, v1 ; 0A0202F0 V_RCP_F32_e32 v0, v0 ; 7E005500 V_MAD_F32 v15, v1, v0, 1.000000e+00, 0, 0 ; D282000F 03CA0101 IMAGE_SAMPLE_L v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[12:19], s[8:11] ; F0900F00 0043090E S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_B64 exec, exec, s[22:23] ; 88FE167E V_MUL_F32_e32 v0, v10, v4 ; 1000090A V_MUL_F32_e32 v1, v9, v5 ; 10020B09 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_MUL_F32_e32 v1, v11, v3 ; 1002070B V_MUL_F32_e32 v2, v12, v2 ; 1004050C V_CVT_PKRTZ_F16_F32_e32 v1, v1, v2 ; 5E020501 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v4 ; 10020804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 EXP 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v2, v6, v5, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v1, v1, v7, v6, 0, 0 ; D2820001 041A0F01 V_MOV_B32_e32 v2, 1.000000e+00 ; 7E0402F2 V_MOV_B32_e32 v3, 0.000000e+00 ; 7E060280 EXP 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v4, s4 ; 7E080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v0, v1, v6, v5, 0, 0 ; D2820000 04160D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v0 ; 10020004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x23 ; C2020123 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x11 ; C2028111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MUL_F32_e32 v1, v1, v2 ; 10020501 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x22 ; C2020122 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v4, s4, v5, 0, 0 ; D2820005 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v2 ; 100A0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x20 ; C2000120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v2 ; 10000500 EXP 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 S_ENDPGM ; BF810000 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[8] DCL CONST[0..5] DCL TEMP[0] DCL TEMP[1..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 1024.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[8].xxxx, CONST[8].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[5].xyxx, CONST[5].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 8: UIF TEMP[5].xxxx :2 9: KILL 10: ENDIF 11: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 12: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 13: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 14: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 15: UIF TEMP[5].xxxx :2 16: KILL 17: ENDIF 18: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 19: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 20: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 21: OR TEMP[4].x, TEMP[4].xxxx, TEMP[2].yyyy 22: UIF TEMP[4].xxxx :2 23: KILL 24: ENDIF 25: ADD TEMP[3], CONST[0].wwww, -TEMP[1].yyyy 26: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 27: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 28: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 29: UIF TEMP[2].xxxx :2 30: KILL 31: ENDIF 32: MOV TEMP[3].xz, IMM[0].yyzy 33: FSLT TEMP[1].x, IMM[0].yyyy, CONST[1].xxxx 34: UIF TEMP[1].xxxx :2 35: MUL TEMP[1].yw, CONST[3].xxzy, IN[2].xxzy 36: MOV TEMP[3].yw, TEMP[1].wyww 37: FRC TEMP[1].xy, TEMP[3].ywzw 38: MOV TEMP[1].xy, TEMP[1].xyxx 39: ADD TEMP[2].yw, TEMP[3], -TEMP[1].xxzy 40: MOV TEMP[3].yw, TEMP[2].wyww 41: MAD TEMP[2].xy, IN[2], CONST[3], -TEMP[3].ywzw 42: MOV TEMP[1].xy, TEMP[2].xyxx 43: ADD TEMP[2].yw, TEMP[3], IMM[0].yyyy 44: MOV TEMP[3].yw, TEMP[2].wyww 45: RCP TEMP[2].x, CONST[3].xxxx 46: RCP TEMP[4].x, CONST[3].yyyy 47: MOV TEMP[2].y, TEMP[4].xxxx 48: MUL TEMP[4].xy, TEMP[3].ywzw, TEMP[2] 49: MOV TEMP[4].xy, TEMP[4].xyyy 50: MOV TEMP[4].w, IMM[0].xxxx 51: TXL TEMP[4], TEMP[4], SAMP[0], 2D 52: MOV TEMP[2].zw, TEMP[4] 53: ADD TEMP[5].y, IMM[0].zzzz, -CONST[2].xxxx 54: MOV TEMP[3].y, TEMP[5].yyyy 55: MUL TEMP[6].z, TEMP[4].xxxx, IMM[0].wwww 56: MAD TEMP[7].w, TEMP[4].wwww, IMM[0].wwww, IMM[0].yyyy 57: FRC TEMP[8].z, TEMP[7].wwww 58: ADD TEMP[7].w, TEMP[7].wwww, -TEMP[8].zzzz 59: MAD TEMP[6].z, TEMP[7].wwww, IMM[1].xxxx, TEMP[6].zzzz 60: ADD TEMP[6].z, TEMP[6].zzzz, IMM[0].yyyy 61: FRC TEMP[7].w, TEMP[6].zzzz 62: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].zzzz 63: MOV TEMP[3].z, TEMP[6].zzzz 64: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].yyyy 65: FRC TEMP[6].z, TEMP[4].wwww 66: MOV TEMP[1].z, TEMP[6].zzzz 67: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 68: MOV TEMP[3].w, TEMP[4].wwww 69: ADD TEMP[4].zw, TEMP[3], IMM[0].yyyy 70: FSGE TEMP[5].x, TEMP[5].yyyy, IMM[0].xxxx 71: UIF TEMP[5].xxxx :2 72: MOV TEMP[5].x, TEMP[4].wwww 73: ELSE :2 74: MOV TEMP[5].x, TEMP[4].zzzz 75: ENDIF 76: MOV TEMP[3].y, TEMP[5].xxxx 77: RCP TEMP[4].x, CONST[1].xxxx 78: MAD TEMP[4].x, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].yyyy 79: FRC TEMP[6].z, TEMP[4].xxxx 80: ADD TEMP[4].x, -TEMP[6].zzzz, TEMP[4].xxxx 81: MUL TEMP[6].z, TEMP[4].xxxx, TEMP[5].xxxx 82: MOV TEMP[7].x, -TEMP[4].xxxx 83: FSGE TEMP[6].x, TEMP[6].zzzz, IMM[0].xxxx 84: UIF TEMP[6].xxxx :2 85: MOV TEMP[6].x, TEMP[4].xxxx 86: ELSE :2 87: MOV TEMP[6].x, TEMP[7].xxxx 88: ENDIF 89: MOV TEMP[3].z, TEMP[6].xxxx 90: RCP TEMP[6].x, TEMP[6].xxxx 91: MUL TEMP[5].w, TEMP[6].xxxx, TEMP[5].xxxx 92: FRC TEMP[5].w, TEMP[5].wwww 93: MOV TEMP[3].w, TEMP[5].wwww 94: RCP TEMP[3].x, TEMP[4].xxxx 95: MUL TEMP[4].xy, TEMP[3].wxzw, TEMP[3].zyzw 96: MOV TEMP[2].xy, TEMP[4].xyxx 97: FRC TEMP[4].xy, TEMP[2] 98: MOV TEMP[3].xy, TEMP[4].xyxx 99: ADD TEMP[2].xy, -TEMP[3], TEMP[2] 100: MOV TEMP[3].xy, TEMP[2].xyxx 101: ADD TEMP[2].xy, TEMP[1], TEMP[3] 102: MOV TEMP[3].xy, TEMP[2].xyxx 103: MUL TEMP[2].xy, TEMP[3], CONST[1].xxxx 104: MOV TEMP[3].xy, TEMP[2].xyxx 105: FRC TEMP[2].zw, TEMP[2].xyxy 106: MOV TEMP[3].zw, TEMP[2].wwzw 107: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[3] 108: MOV TEMP[3].xy, TEMP[2].xyxx 109: ADD TEMP[2].xy, TEMP[3], IMM[0].yyyy 110: MOV TEMP[3].xy, TEMP[2].xyxx 111: RCP TEMP[1].x, CONST[4].xxxx 112: RCP TEMP[2].x, CONST[4].yyyy 113: MOV TEMP[1].y, TEMP[2].xxxx 114: MUL TEMP[1].xy, TEMP[3], TEMP[1] 115: MOV TEMP[1].xy, TEMP[1].xyyy 116: MOV TEMP[1].w, IMM[0].xxxx 117: TXL TEMP[1], TEMP[1], SAMP[1], 2D 118: MOV TEMP[3], TEMP[1] 119: ELSE :2 120: MOV TEMP[1].xy, IN[2].xyyy 121: TEX TEMP[1], TEMP[1], SAMP[1], 2D 122: MOV TEMP[3], TEMP[1] 123: ENDIF 124: MUL TEMP[1], TEMP[3], IN[1] 125: MOV OUT[0], TEMP[1] 126: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %41 = load <8 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %43 = load <4 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %54 = fmul float %15, %38 %55 = fadd float %54, %39 %56 = fmul float %14, %34 %57 = fadd float %56, %36 %58 = fmul float %55, %35 %59 = fadd float %58, %37 %60 = fsub float -0.000000e+00, %24 %61 = fsub float -0.000000e+00, %26 %62 = fadd float %60, %57 %63 = fadd float %60, %57 %64 = fadd float %60, %57 %65 = fcmp olt float %62, 0.000000e+00 %66 = sext i1 %65 to i32 %67 = fcmp olt float %63, 0.000000e+00 %68 = sext i1 %67 to i32 %69 = fcmp olt float %64, 0.000000e+00 %70 = sext i1 %69 to i32 %71 = bitcast i32 %66 to float %72 = bitcast i32 %68 to float %73 = bitcast i32 %70 to float %74 = bitcast float %71 to i32 %75 = bitcast float %73 to i32 %76 = or i32 %74, %75 %77 = bitcast i32 %76 to float %78 = bitcast float %77 to i32 %79 = bitcast float %72 to i32 %80 = or i32 %78, %79 %81 = bitcast i32 %80 to float %82 = bitcast float %81 to i32 %83 = icmp ne i32 %82, 0 br i1 %83, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %84 = fsub float -0.000000e+00, %57 %85 = fadd float %25, %84 %86 = fsub float -0.000000e+00, %57 %87 = fadd float %25, %86 %88 = fsub float -0.000000e+00, %57 %89 = fadd float %25, %88 %90 = fcmp olt float %85, 0.000000e+00 %91 = sext i1 %90 to i32 %92 = fcmp olt float %87, 0.000000e+00 %93 = sext i1 %92 to i32 %94 = fcmp olt float %89, 0.000000e+00 %95 = sext i1 %94 to i32 %96 = bitcast i32 %91 to float %97 = bitcast i32 %93 to float %98 = bitcast i32 %95 to float %99 = bitcast float %96 to i32 %100 = bitcast float %98 to i32 %101 = or i32 %99, %100 %102 = bitcast i32 %101 to float %103 = bitcast float %102 to i32 %104 = bitcast float %97 to i32 %105 = or i32 %103, %104 %106 = bitcast i32 %105 to float %107 = bitcast float %106 to i32 %108 = icmp ne i32 %107, 0 br i1 %108, label %IF37, label %ENDIF36 IF37: ; preds = %ENDIF call void @llvm.AMDGPU.kilp() br label %ENDIF36 ENDIF36: ; preds = %ENDIF, %IF37 %109 = fadd float %61, %59 %110 = fadd float %61, %59 %111 = fadd float %61, %59 %112 = fcmp olt float %109, 0.000000e+00 %113 = sext i1 %112 to i32 %114 = fcmp olt float %110, 0.000000e+00 %115 = sext i1 %114 to i32 %116 = fcmp olt float %111, 0.000000e+00 %117 = sext i1 %116 to i32 %118 = bitcast i32 %113 to float %119 = bitcast i32 %115 to float %120 = bitcast i32 %117 to float %121 = bitcast float %118 to i32 %122 = bitcast float %120 to i32 %123 = or i32 %121, %122 %124 = bitcast i32 %123 to float %125 = bitcast float %124 to i32 %126 = bitcast float %119 to i32 %127 = or i32 %125, %126 %128 = bitcast i32 %127 to float %129 = bitcast float %128 to i32 %130 = icmp ne i32 %129, 0 br i1 %130, label %IF40, label %ENDIF39 IF40: ; preds = %ENDIF36 call void @llvm.AMDGPU.kilp() br label %ENDIF39 ENDIF39: ; preds = %ENDIF36, %IF40 %131 = fsub float -0.000000e+00, %59 %132 = fadd float %27, %131 %133 = fsub float -0.000000e+00, %59 %134 = fadd float %27, %133 %135 = fsub float -0.000000e+00, %59 %136 = fadd float %27, %135 %137 = fcmp olt float %132, 0.000000e+00 %138 = sext i1 %137 to i32 %139 = fcmp olt float %134, 0.000000e+00 %140 = sext i1 %139 to i32 %141 = fcmp olt float %136, 0.000000e+00 %142 = sext i1 %141 to i32 %143 = bitcast i32 %138 to float %144 = bitcast i32 %140 to float %145 = bitcast i32 %142 to float %146 = bitcast float %143 to i32 %147 = bitcast float %145 to i32 %148 = or i32 %146, %147 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = bitcast float %144 to i32 %152 = or i32 %150, %151 %153 = bitcast i32 %152 to float %154 = bitcast float %153 to i32 %155 = icmp ne i32 %154, 0 br i1 %155, label %IF43, label %ENDIF42 IF43: ; preds = %ENDIF39 call void @llvm.AMDGPU.kilp() br label %ENDIF42 ENDIF42: ; preds = %ENDIF39, %IF43 %156 = fcmp olt float 5.000000e-01, %28 %157 = sext i1 %156 to i32 %158 = bitcast i32 %157 to float %159 = bitcast float %158 to i32 %160 = icmp ne i32 %159, 0 br i1 %160, label %IF46, label %ELSE47 IF46: ; preds = %ENDIF42 %161 = fmul float %30, %52 %162 = fmul float %31, %53 %163 = call float @llvm.AMDIL.fraction.(float %161) %164 = call float @llvm.AMDIL.fraction.(float %162) %165 = fsub float -0.000000e+00, %163 %166 = fadd float %161, %165 %167 = fsub float -0.000000e+00, %164 %168 = fadd float %162, %167 %169 = fsub float -0.000000e+00, %166 %170 = fmul float %52, %30 %171 = fadd float %170, %169 %172 = fsub float -0.000000e+00, %168 %173 = fmul float %53, %31 %174 = fadd float %173, %172 %175 = fadd float %166, 5.000000e-01 %176 = fadd float %168, 5.000000e-01 %177 = fdiv float 1.000000e+00, %30 %178 = fdiv float 1.000000e+00, %31 %179 = fmul float %175, %177 %180 = fmul float %176, %178 %181 = bitcast float %179 to i32 %182 = bitcast float %180 to i32 %183 = bitcast float 0.000000e+00 to i32 %184 = insertelement <4 x i32> undef, i32 %181, i32 0 %185 = insertelement <4 x i32> %184, i32 %182, i32 1 %186 = insertelement <4 x i32> %185, i32 %183, i32 2 %187 = insertelement <4 x i32> %186, i32 undef, i32 3 %188 = bitcast <8 x i32> %41 to <32 x i8> %189 = bitcast <4 x i32> %43 to <16 x i8> %190 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %187, <32 x i8> %188, <16 x i8> %189, i32 2) %191 = extractelement <4 x float> %190, i32 0 %192 = extractelement <4 x float> %190, i32 3 %193 = fsub float -0.000000e+00, %29 %194 = fadd float 1.024000e+03, %193 %195 = fmul float %191, 2.550000e+02 %196 = fmul float %192, 2.550000e+02 %197 = fadd float %196, 5.000000e-01 %198 = call float @llvm.AMDIL.fraction.(float %197) %199 = fsub float -0.000000e+00, %198 %200 = fadd float %197, %199 %201 = fmul float %200, 2.560000e+02 %202 = fadd float %201, %195 %203 = fadd float %202, 5.000000e-01 %204 = call float @llvm.AMDIL.fraction.(float %203) %205 = fsub float -0.000000e+00, %204 %206 = fadd float %205, %203 %207 = fmul float %29, %191 %208 = fadd float %207, 5.000000e-01 %209 = call float @llvm.AMDIL.fraction.(float %208) %210 = fsub float -0.000000e+00, %209 %211 = fadd float %208, %210 %212 = fadd float %206, 5.000000e-01 %213 = fadd float %211, 5.000000e-01 %214 = fcmp oge float %194, 0.000000e+00 %215 = sext i1 %214 to i32 %216 = bitcast i32 %215 to float %217 = bitcast float %216 to i32 %218 = icmp ne i32 %217, 0 %. = select i1 %218, float %213, float %212 %219 = fdiv float 1.000000e+00, %28 %220 = fmul float %32, %219 %221 = fadd float %220, 5.000000e-01 %222 = call float @llvm.AMDIL.fraction.(float %221) %223 = fsub float -0.000000e+00, %222 %224 = fadd float %223, %221 %225 = fmul float %224, %. %226 = fsub float -0.000000e+00, %224 %227 = fcmp oge float %225, 0.000000e+00 %228 = sext i1 %227 to i32 %229 = bitcast i32 %228 to float %230 = bitcast float %229 to i32 %231 = icmp ne i32 %230, 0 %temp24.0 = select i1 %231, float %224, float %226 %232 = fdiv float 1.000000e+00, %temp24.0 %233 = fmul float %232, %. %234 = call float @llvm.AMDIL.fraction.(float %233) %235 = fdiv float 1.000000e+00, %224 %236 = fmul float %234, %temp24.0 %237 = fmul float %235, %. %238 = call float @llvm.AMDIL.fraction.(float %236) %239 = call float @llvm.AMDIL.fraction.(float %237) %240 = fsub float -0.000000e+00, %238 %241 = fadd float %240, %236 %242 = fsub float -0.000000e+00, %239 %243 = fadd float %242, %237 %244 = fadd float %171, %241 %245 = fadd float %174, %243 %246 = fmul float %244, %28 %247 = fmul float %245, %28 %248 = call float @llvm.AMDIL.fraction.(float %246) %249 = call float @llvm.AMDIL.fraction.(float %247) %250 = fsub float -0.000000e+00, %248 %251 = fadd float %250, %246 %252 = fsub float -0.000000e+00, %249 %253 = fadd float %252, %247 %254 = fadd float %251, 5.000000e-01 %255 = fadd float %253, 5.000000e-01 %256 = fdiv float 1.000000e+00, %32 %257 = fdiv float 1.000000e+00, %33 %258 = fmul float %254, %256 %259 = fmul float %255, %257 %260 = bitcast float %258 to i32 %261 = bitcast float %259 to i32 %262 = bitcast float 0.000000e+00 to i32 %263 = insertelement <4 x i32> undef, i32 %260, i32 0 %264 = insertelement <4 x i32> %263, i32 %261, i32 1 %265 = insertelement <4 x i32> %264, i32 %262, i32 2 %266 = insertelement <4 x i32> %265, i32 undef, i32 3 %267 = bitcast <8 x i32> %45 to <32 x i8> %268 = bitcast <4 x i32> %47 to <16 x i8> %269 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %266, <32 x i8> %267, <16 x i8> %268, i32 2) %270 = extractelement <4 x float> %269, i32 0 %271 = extractelement <4 x float> %269, i32 1 %272 = extractelement <4 x float> %269, i32 2 br label %ENDIF45 ELSE47: ; preds = %ENDIF42 %273 = bitcast float %52 to i32 %274 = bitcast float %53 to i32 %275 = insertelement <2 x i32> undef, i32 %273, i32 0 %276 = insertelement <2 x i32> %275, i32 %274, i32 1 %277 = bitcast <8 x i32> %45 to <32 x i8> %278 = bitcast <4 x i32> %47 to <16 x i8> %279 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %276, <32 x i8> %277, <16 x i8> %278, i32 2) %280 = extractelement <4 x float> %279, i32 0 %281 = extractelement <4 x float> %279, i32 1 %282 = extractelement <4 x float> %279, i32 2 br label %ENDIF45 ENDIF45: ; preds = %ELSE47, %IF46 %.sink = phi <4 x float> [ %279, %ELSE47 ], [ %269, %IF46 ] %temp12.0 = phi float [ %270, %IF46 ], [ %280, %ELSE47 ] %temp13.0 = phi float [ %271, %IF46 ], [ %281, %ELSE47 ] %temp14.0 = phi float [ %272, %IF46 ], [ %282, %ELSE47 ] %283 = extractelement <4 x float> %.sink, i32 3 %284 = fmul float %temp12.0, %48 %285 = fmul float %temp13.0, %49 %286 = fmul float %temp14.0, %50 %287 = fmul float %283, %51 %288 = call i32 @llvm.SI.packf16(float %284, float %285) %289 = bitcast i32 %288 to float %290 = call i32 @llvm.SI.packf16(float %286, float %287) %291 = bitcast i32 %290 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %289, float %291, float %289, float %291) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v9, v0, 1, 1, [m0] ; C8240500 V_INTERP_P2_F32 v9, [v9], v1, 1, 1, [m0] ; C8250501 V_INTERP_P1_F32 v8, v0, 0, 1, [m0] ; C8200400 V_INTERP_P2_F32 v8, [v8], v1, 0, 1, [m0] ; C8210401 V_INTERP_P1_F32 v4, v0, 3, 0, [m0] ; C8100300 V_INTERP_P2_F32 v4, [v4], v1, 3, 0, [m0] ; C8110301 V_INTERP_P1_F32 v5, v0, 2, 0, [m0] ; C8140200 V_INTERP_P2_F32 v5, [v5], v1, 2, 0, [m0] ; C8150201 V_INTERP_P1_F32 v6, v0, 1, 0, [m0] ; C8180100 V_INTERP_P2_F32 v6, [v6], v1, 1, 0, [m0] ; C8190101 V_INTERP_P1_F32 v7, v0, 0, 0, [m0] ; C81C0000 V_INTERP_P2_F32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x14 ; C2040114 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x16 ; C2048116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s9 ; 7E000209 V_MAD_F32 v0, v2, s8, v0, 0, 0 ; D2820000 04001102 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v1, s8, v0 ; 0A020008 V_CMP_LT_F32_e64 s[8:9], v1, 0.000000e+00, 0, 0 ; D0020008 00010101 V_CNDMASK_B32_e64 v1, 0, -1, s[8:9], 0, 0, 0, 0 ; D2000001 00218280 V_OR_B32_e32 v1, v1, v1 ; 38020301 V_CMP_NE_I32_e64 s[14:15], v1, 0, 0, 0 ; D10A000E 00010101 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x21 ; C2048121 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x20 ; C2050120 S_BUFFER_LOAD_DWORD s11, s[0:3], 0x17 ; C2058117 S_BUFFER_LOAD_DWORD s12, s[0:3], 0x15 ; C2060115 S_BUFFER_LOAD_DWORD s13, s[0:3], 0x2 ; C2068102 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x1 ; C2040101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v13, s8 ; 7E1A0208 S_AND_SAVEEXEC_B64 s[14:15], s[14:15] ; BE8E240E S_XOR_B64 s[14:15], exec, s[14:15] ; 898E0E7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[14:15] ; 88FE0E7E S_BUFFER_LOAD_DWORD s8, s[0:3], 0x3 ; C2040103 V_MOV_B32_e32 v1, s9 ; 7E020209 V_MOV_B32_e32 v10, s10 ; 7E14020A V_MOV_B32_e32 v2, s11 ; 7E04020B V_MOV_B32_e32 v12, s12 ; 7E18020C V_MOV_B32_e32 v11, s13 ; 7E16020D S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_SUB_F32_e32 v0, v13, v0 ; 0800010D V_CMP_LT_F32_e64 s[10:11], v0, 0.000000e+00, 0, 0 ; D002000A 00010100 V_CNDMASK_B32_e64 v0, 0, -1, s[10:11], 0, 0, 0, 0 ; D2000000 00298280 V_OR_B32_e32 v0, v0, v0 ; 38000100 V_CMP_NE_I32_e64 s[10:11], v0, 0, 0, 0 ; D10A000A 00010100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[10:11], s[10:11] ; BE8A240A S_XOR_B64 s[10:11], exec, s[10:11] ; 898A0A7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[10:11] ; 88FE0A7E S_BUFFER_LOAD_DWORD s9, s[0:3], 0x4 ; C2048104 V_MOV_B32_e32 v13, s8 ; 7E1A0208 S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_MAD_F32 v0, v3, v10, v1, 0, 0 ; D2820000 04061503 V_MAD_F32 v1, v0, v12, v2, 0, 0 ; D2820001 040A1900 V_SUB_F32_e32 v0, v1, v11 ; 08001701 V_CMP_LT_F32_e64 s[10:11], v0, 0.000000e+00, 0, 0 ; D002000A 00010100 V_CNDMASK_B32_e64 v0, 0, -1, s[10:11], 0, 0, 0, 0 ; D2000000 00298280 V_OR_B32_e32 v0, v0, v0 ; 38000100 V_CMP_NE_I32_e64 s[10:11], v0, 0, 0, 0 ; D10A000A 00010100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[10:11], s[10:11] ; BE8A240A S_XOR_B64 s[10:11], exec, s[10:11] ; 898A0A7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[10:11] ; 88FE0A7E V_MOV_B32_e32 v0, s9 ; 7E000209 S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_SUB_F32_e32 v1, v13, v1 ; 0802030D V_CMP_LT_F32_e64 s[8:9], v1, 0.000000e+00, 0, 0 ; D0020008 00010101 V_CNDMASK_B32_e64 v1, 0, -1, s[8:9], 0, 0, 0, 0 ; D2000001 00218280 V_OR_B32_e32 v1, v1, v1 ; 38020301 V_CMP_NE_I32_e64 s[8:9], v1, 0, 0, 0 ; D10A0008 00010101 S_AND_SAVEEXEC_B64 s[8:9], s[8:9] ; BE882408 S_XOR_B64 s[8:9], exec, s[8:9] ; 8988087E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[8:9] ; 88FE087E S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x4 ; C0840504 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x8 ; C0C60708 V_CMP_GT_F32_e64 s[20:21], v0, 5.000000e-01, 0, 0 ; D0080014 0001E100 V_CNDMASK_B32_e64 v1, 0, -1, s[20:21], 0, 0, 0, 0 ; D2000001 00518280 V_CMP_EQ_I32_e64 s[20:21], v1, 0, 0, 0 ; D1040014 00010101 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[20:21], s[20:21] ; BE942414 S_XOR_B64 s[20:21], exec, s[20:21] ; 8994147E IMAGE_SAMPLE v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[12:19], s[8:11] ; F0800F00 00430A08 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_SAVEEXEC_B64 s[20:21], s[20:21] ; BE942514 S_XOR_B64 exec, exec, s[20:21] ; 89FE147E S_CBRANCH_EXECZ BB0_12 ; BF880000 S_BUFFER_LOAD_DWORD s22, s[0:3], 0x11 ; C20B0111 S_BUFFER_LOAD_DWORD s23, s[0:3], 0x10 ; C20B8110 S_BUFFER_LOAD_DWORD s24, s[0:3], 0xd ; C20C010D S_BUFFER_LOAD_DWORD s25, s[0:3], 0xc ; C20C810C S_BUFFER_LOAD_DWORD s26, s[0:3], 0x8 ; C20D0108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s22 ; 7E020216 V_MOV_B32_e32 v2, s23 ; 7E040217 V_MOV_B32_e32 v3, s24 ; 7E060218 V_MOV_B32_e32 v10, s25 ; 7E140219 V_MOV_B32_e32 v11, s26 ; 7E16021A V_MUL_F32_e32 v12, v9, v3 ; 10180709 V_FRACT_F32_e32 v13, v12 ; 7E1A410C V_SUB_F32_e32 v13, v12, v13 ; 081A1B0C V_ADD_F32_e32 v14, 5.000000e-01, v13 ; 061C1AF0 V_RCP_F32_e32 v3, v3 ; 7E065503 V_MUL_F32_e32 v15, v14, v3 ; 101E070E V_MUL_F32_e32 v3, v8, v10 ; 10061508 V_FRACT_F32_e32 v8, v3 ; 7E104103 V_SUB_F32_e32 v8, v3, v8 ; 08101103 V_ADD_F32_e32 v9, 5.000000e-01, v8 ; 061210F0 V_RCP_F32_e32 v10, v10 ; 7E14550A V_MUL_F32_e32 v14, v9, v10 ; 101C1509 V_MOV_B32_e32 v16, 0 ; 7E200280 S_LOAD_DWORDX4 s[24:27], s[4:5], 0x0 ; C08C0500 S_LOAD_DWORDX8 s[28:35], s[6:7], 0x0 ; C0CE0700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE_L v[9:10], 9, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[28:35], s[24:27] ; F0900900 00C7090E V_MOV_B32_e32 v18, 2.550000e+02 ; 7E2402FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v19, v10, v18, 5.000000e-01, 0, 0 ; D2820013 03C2250A V_FRACT_F32_e32 v20, v19 ; 7E284113 V_SUB_F32_e32 v19, v19, v20 ; 08262913 V_MUL_F32_e32 v19, 2.560000e+02, v19 ; 102626FF 43800000 V_MAD_F32 v18, v9, v18, v19, 0, 0 ; D2820012 044E2509 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_FRACT_F32_e32 v19, v18 ; 7E264112 V_SUB_F32_e32 v18, v18, v19 ; 08242712 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_MAD_F32 v9, v11, v9, 5.000000e-01, 0, 0 ; D2820009 03C2130B V_FRACT_F32_e32 v10, v9 ; 7E144109 V_SUB_F32_e32 v9, v9, v10 ; 08121509 V_ADD_F32_e32 v9, 5.000000e-01, v9 ; 061212F0 V_SUB_F32_e32 v10, 1.024000e+03, v11 ; 081416FF 44800000 V_CMP_GE_F32_e64 s[22:23], v10, 0.000000e+00, 0, 0 ; D00C0016 0001010A V_CNDMASK_B32_e64 v9, v18, v9, s[22:23], 0, 0, 0, 0 ; D2000009 005A1312 V_RCP_F32_e32 v10, v0 ; 7E145500 V_MAD_F32 v10, v2, v10, 5.000000e-01, 0, 0 ; D282000A 03C21502 V_FRACT_F32_e32 v11, v10 ; 7E16410A V_SUB_F32_e32 v10, v10, v11 ; 0814170A V_MUL_F32_e32 v11, v10, v9 ; 1016130A V_CMP_GE_F32_e64 s[22:23], v11, 0.000000e+00, 0, 0 ; D00C0016 0001010B V_MOV_B32_e32 v11, 0x80000000 ; 7E1602FF 80000000 V_XOR_B32_e32 v11, v10, v11 ; 3A16170A V_CNDMASK_B32_e64 v11, v11, v10, s[22:23], 0, 0, 0, 0 ; D200000B 005A150B V_RCP_F32_e32 v18, v11 ; 7E24550B V_MUL_F32_e32 v18, v18, v9 ; 10241312 V_FRACT_F32_e32 v18, v18 ; 7E244112 V_MUL_F32_e32 v11, v18, v11 ; 10161712 V_FRACT_F32_e32 v18, v11 ; 7E24410B V_SUB_F32_e32 v11, v11, v18 ; 0816250B V_SUB_F32_e32 v3, v3, v8 ; 08061103 V_ADD_F32_e32 v3, v3, v11 ; 06061703 V_MUL_F32_e32 v3, v3, v0 ; 10060103 V_FRACT_F32_e32 v8, v3 ; 7E104103 V_SUB_F32_e32 v3, v3, v8 ; 08061103 V_ADD_F32_e32 v3, 5.000000e-01, v3 ; 060606F0 V_RCP_F32_e32 v2, v2 ; 7E045502 V_MUL_F32_e32 v14, v3, v2 ; 101C0503 V_RCP_F32_e32 v2, v10 ; 7E04550A V_MUL_F32_e32 v2, v2, v9 ; 10041302 V_FRACT_F32_e32 v3, v2 ; 7E064102 V_SUB_F32_e32 v2, v2, v3 ; 08040702 V_SUB_F32_e32 v3, v12, v13 ; 08061B0C V_ADD_F32_e32 v2, v3, v2 ; 06040503 V_MUL_F32_e32 v0, v2, v0 ; 10000102 V_FRACT_F32_e32 v2, v0 ; 7E044100 V_SUB_F32_e32 v0, v0, v2 ; 08000500 V_ADD_F32_e32 v0, 5.000000e-01, v0 ; 060000F0 V_RCP_F32_e32 v1, v1 ; 7E025501 V_MUL_F32_e32 v15, v0, v1 ; 101E0300 IMAGE_SAMPLE_L v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[12:19], s[8:11] ; F0900F00 00430A0E S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_B64 exec, exec, s[20:21] ; 88FE147E V_MUL_F32_e32 v0, v11, v6 ; 10000D0B V_MUL_F32_e32 v1, v10, v7 ; 10020F0A V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_MUL_F32_e32 v1, v12, v5 ; 10020B0C V_MUL_F32_e32 v2, v13, v4 ; 1004090D V_CVT_PKRTZ_F16_F32_e32 v1, v1, v2 ; 5E020501 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v4 ; 10020804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 EXP 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v2, v6, v5, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v1, v1, v7, v6, 0, 0 ; D2820001 041A0F01 V_MOV_B32_e32 v2, 1.000000e+00 ; 7E0402F2 V_MOV_B32_e32 v3, 0.000000e+00 ; 7E060280 EXP 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v4, s4 ; 7E080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v0, v1, v6, v5, 0, 0 ; D2820000 04160D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v0 ; 10020004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x23 ; C2020123 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x11 ; C2028111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MUL_F32_e32 v1, v1, v2 ; 10020501 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x22 ; C2020122 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v4, s4, v5, 0, 0 ; D2820005 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v2 ; 100A0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x20 ; C2000120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v2 ; 10000500 EXP 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 S_ENDPGM ; BF810000 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[8] DCL CONST[0..5] DCL TEMP[0] DCL TEMP[1..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 1024.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[8].xxxx, CONST[8].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[5].xyxx, CONST[5].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 8: UIF TEMP[5].xxxx :2 9: KILL 10: ENDIF 11: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 12: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 13: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 14: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 15: UIF TEMP[5].xxxx :2 16: KILL 17: ENDIF 18: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 19: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 20: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 21: OR TEMP[4].x, TEMP[4].xxxx, TEMP[2].yyyy 22: UIF TEMP[4].xxxx :2 23: KILL 24: ENDIF 25: ADD TEMP[3], CONST[0].wwww, -TEMP[1].yyyy 26: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 27: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 28: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 29: UIF TEMP[2].xxxx :2 30: KILL 31: ENDIF 32: MOV TEMP[3].xz, IMM[0].yyzy 33: FSLT TEMP[1].x, IMM[0].yyyy, CONST[1].xxxx 34: UIF TEMP[1].xxxx :2 35: MUL TEMP[1].yw, CONST[3].xxzy, IN[2].xxzy 36: MOV TEMP[3].yw, TEMP[1].wyww 37: FRC TEMP[1].xy, TEMP[3].ywzw 38: MOV TEMP[1].xy, TEMP[1].xyxx 39: ADD TEMP[2].yw, TEMP[3], -TEMP[1].xxzy 40: MOV TEMP[3].yw, TEMP[2].wyww 41: MAD TEMP[2].xy, IN[2], CONST[3], -TEMP[3].ywzw 42: MOV TEMP[1].xy, TEMP[2].xyxx 43: ADD TEMP[2].yw, TEMP[3], IMM[0].yyyy 44: MOV TEMP[3].yw, TEMP[2].wyww 45: RCP TEMP[2].x, CONST[3].xxxx 46: RCP TEMP[4].x, CONST[3].yyyy 47: MOV TEMP[2].y, TEMP[4].xxxx 48: MUL TEMP[4].xy, TEMP[3].ywzw, TEMP[2] 49: MOV TEMP[4].xy, TEMP[4].xyyy 50: MOV TEMP[4].w, IMM[0].xxxx 51: TXL TEMP[4], TEMP[4], SAMP[0], 2D 52: MOV TEMP[2].zw, TEMP[4] 53: ADD TEMP[5].y, IMM[0].zzzz, -CONST[2].xxxx 54: MOV TEMP[3].y, TEMP[5].yyyy 55: MUL TEMP[6].z, TEMP[4].xxxx, IMM[0].wwww 56: MAD TEMP[7].w, TEMP[4].wwww, IMM[0].wwww, IMM[0].yyyy 57: FRC TEMP[8].z, TEMP[7].wwww 58: ADD TEMP[7].w, TEMP[7].wwww, -TEMP[8].zzzz 59: MAD TEMP[6].z, TEMP[7].wwww, IMM[1].xxxx, TEMP[6].zzzz 60: ADD TEMP[6].z, TEMP[6].zzzz, IMM[0].yyyy 61: FRC TEMP[7].w, TEMP[6].zzzz 62: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].zzzz 63: MOV TEMP[3].z, TEMP[6].zzzz 64: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].yyyy 65: FRC TEMP[6].z, TEMP[4].wwww 66: MOV TEMP[1].z, TEMP[6].zzzz 67: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 68: MOV TEMP[3].w, TEMP[4].wwww 69: ADD TEMP[4].zw, TEMP[3], IMM[0].yyyy 70: FSGE TEMP[5].x, TEMP[5].yyyy, IMM[0].xxxx 71: UIF TEMP[5].xxxx :2 72: MOV TEMP[5].x, TEMP[4].wwww 73: ELSE :2 74: MOV TEMP[5].x, TEMP[4].zzzz 75: ENDIF 76: MOV TEMP[3].y, TEMP[5].xxxx 77: RCP TEMP[4].x, CONST[1].xxxx 78: MAD TEMP[4].x, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].yyyy 79: FRC TEMP[6].z, TEMP[4].xxxx 80: ADD TEMP[4].x, -TEMP[6].zzzz, TEMP[4].xxxx 81: MUL TEMP[6].z, TEMP[4].xxxx, TEMP[5].xxxx 82: MOV TEMP[7].x, -TEMP[4].xxxx 83: FSGE TEMP[6].x, TEMP[6].zzzz, IMM[0].xxxx 84: UIF TEMP[6].xxxx :2 85: MOV TEMP[6].x, TEMP[4].xxxx 86: ELSE :2 87: MOV TEMP[6].x, TEMP[7].xxxx 88: ENDIF 89: MOV TEMP[3].z, TEMP[6].xxxx 90: RCP TEMP[6].x, TEMP[6].xxxx 91: MUL TEMP[5].w, TEMP[6].xxxx, TEMP[5].xxxx 92: FRC TEMP[5].w, TEMP[5].wwww 93: MOV TEMP[3].w, TEMP[5].wwww 94: RCP TEMP[3].x, TEMP[4].xxxx 95: MUL TEMP[4].xy, TEMP[3].wxzw, TEMP[3].zyzw 96: MOV TEMP[2].xy, TEMP[4].xyxx 97: FRC TEMP[4].xy, TEMP[2] 98: MOV TEMP[3].xy, TEMP[4].xyxx 99: ADD TEMP[2].xy, -TEMP[3], TEMP[2] 100: MOV TEMP[3].xy, TEMP[2].xyxx 101: ADD TEMP[2].xy, TEMP[1], TEMP[3] 102: MOV TEMP[3].xy, TEMP[2].xyxx 103: MUL TEMP[2].xy, TEMP[3], CONST[1].xxxx 104: MOV TEMP[3].xy, TEMP[2].xyxx 105: FRC TEMP[2].zw, TEMP[2].xyxy 106: MOV TEMP[3].zw, TEMP[2].wwzw 107: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[3] 108: MOV TEMP[3].xy, TEMP[2].xyxx 109: ADD TEMP[2].xy, TEMP[3], IMM[0].yyyy 110: MOV TEMP[3].xy, TEMP[2].xyxx 111: RCP TEMP[1].x, CONST[4].xxxx 112: RCP TEMP[2].x, CONST[4].yyyy 113: MOV TEMP[1].y, TEMP[2].xxxx 114: MUL TEMP[1].xy, TEMP[3], TEMP[1] 115: MOV TEMP[1].xy, TEMP[1].xyyy 116: MOV TEMP[1].w, IMM[0].xxxx 117: TXL TEMP[1], TEMP[1], SAMP[1], 2D 118: MOV TEMP[3], TEMP[1] 119: ELSE :2 120: MOV TEMP[1].xy, IN[2].xyyy 121: TEX TEMP[1], TEMP[1], SAMP[1], 2D 122: MOV TEMP[3], TEMP[1] 123: ENDIF 124: MUL TEMP[1], TEMP[3], IN[1] 125: MOV OUT[0], TEMP[1] 126: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %41 = load <8 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %43 = load <4 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %54 = fmul float %15, %38 %55 = fadd float %54, %39 %56 = fmul float %14, %34 %57 = fadd float %56, %36 %58 = fmul float %55, %35 %59 = fadd float %58, %37 %60 = fsub float -0.000000e+00, %24 %61 = fsub float -0.000000e+00, %26 %62 = fadd float %60, %57 %63 = fadd float %60, %57 %64 = fadd float %60, %57 %65 = fcmp olt float %62, 0.000000e+00 %66 = sext i1 %65 to i32 %67 = fcmp olt float %63, 0.000000e+00 %68 = sext i1 %67 to i32 %69 = fcmp olt float %64, 0.000000e+00 %70 = sext i1 %69 to i32 %71 = bitcast i32 %66 to float %72 = bitcast i32 %68 to float %73 = bitcast i32 %70 to float %74 = bitcast float %71 to i32 %75 = bitcast float %73 to i32 %76 = or i32 %74, %75 %77 = bitcast i32 %76 to float %78 = bitcast float %77 to i32 %79 = bitcast float %72 to i32 %80 = or i32 %78, %79 %81 = bitcast i32 %80 to float %82 = bitcast float %81 to i32 %83 = icmp ne i32 %82, 0 br i1 %83, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %84 = fsub float -0.000000e+00, %57 %85 = fadd float %25, %84 %86 = fsub float -0.000000e+00, %57 %87 = fadd float %25, %86 %88 = fsub float -0.000000e+00, %57 %89 = fadd float %25, %88 %90 = fcmp olt float %85, 0.000000e+00 %91 = sext i1 %90 to i32 %92 = fcmp olt float %87, 0.000000e+00 %93 = sext i1 %92 to i32 %94 = fcmp olt float %89, 0.000000e+00 %95 = sext i1 %94 to i32 %96 = bitcast i32 %91 to float %97 = bitcast i32 %93 to float %98 = bitcast i32 %95 to float %99 = bitcast float %96 to i32 %100 = bitcast float %98 to i32 %101 = or i32 %99, %100 %102 = bitcast i32 %101 to float %103 = bitcast float %102 to i32 %104 = bitcast float %97 to i32 %105 = or i32 %103, %104 %106 = bitcast i32 %105 to float %107 = bitcast float %106 to i32 %108 = icmp ne i32 %107, 0 br i1 %108, label %IF37, label %ENDIF36 IF37: ; preds = %ENDIF call void @llvm.AMDGPU.kilp() br label %ENDIF36 ENDIF36: ; preds = %ENDIF, %IF37 %109 = fadd float %61, %59 %110 = fadd float %61, %59 %111 = fadd float %61, %59 %112 = fcmp olt float %109, 0.000000e+00 %113 = sext i1 %112 to i32 %114 = fcmp olt float %110, 0.000000e+00 %115 = sext i1 %114 to i32 %116 = fcmp olt float %111, 0.000000e+00 %117 = sext i1 %116 to i32 %118 = bitcast i32 %113 to float %119 = bitcast i32 %115 to float %120 = bitcast i32 %117 to float %121 = bitcast float %118 to i32 %122 = bitcast float %120 to i32 %123 = or i32 %121, %122 %124 = bitcast i32 %123 to float %125 = bitcast float %124 to i32 %126 = bitcast float %119 to i32 %127 = or i32 %125, %126 %128 = bitcast i32 %127 to float %129 = bitcast float %128 to i32 %130 = icmp ne i32 %129, 0 br i1 %130, label %IF40, label %ENDIF39 IF40: ; preds = %ENDIF36 call void @llvm.AMDGPU.kilp() br label %ENDIF39 ENDIF39: ; preds = %ENDIF36, %IF40 %131 = fsub float -0.000000e+00, %59 %132 = fadd float %27, %131 %133 = fsub float -0.000000e+00, %59 %134 = fadd float %27, %133 %135 = fsub float -0.000000e+00, %59 %136 = fadd float %27, %135 %137 = fcmp olt float %132, 0.000000e+00 %138 = sext i1 %137 to i32 %139 = fcmp olt float %134, 0.000000e+00 %140 = sext i1 %139 to i32 %141 = fcmp olt float %136, 0.000000e+00 %142 = sext i1 %141 to i32 %143 = bitcast i32 %138 to float %144 = bitcast i32 %140 to float %145 = bitcast i32 %142 to float %146 = bitcast float %143 to i32 %147 = bitcast float %145 to i32 %148 = or i32 %146, %147 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = bitcast float %144 to i32 %152 = or i32 %150, %151 %153 = bitcast i32 %152 to float %154 = bitcast float %153 to i32 %155 = icmp ne i32 %154, 0 br i1 %155, label %IF43, label %ENDIF42 IF43: ; preds = %ENDIF39 call void @llvm.AMDGPU.kilp() br label %ENDIF42 ENDIF42: ; preds = %ENDIF39, %IF43 %156 = fcmp olt float 5.000000e-01, %28 %157 = sext i1 %156 to i32 %158 = bitcast i32 %157 to float %159 = bitcast float %158 to i32 %160 = icmp ne i32 %159, 0 br i1 %160, label %IF46, label %ELSE47 IF46: ; preds = %ENDIF42 %161 = fmul float %30, %52 %162 = fmul float %31, %53 %163 = call float @llvm.AMDIL.fraction.(float %161) %164 = call float @llvm.AMDIL.fraction.(float %162) %165 = fsub float -0.000000e+00, %163 %166 = fadd float %161, %165 %167 = fsub float -0.000000e+00, %164 %168 = fadd float %162, %167 %169 = fsub float -0.000000e+00, %166 %170 = fmul float %52, %30 %171 = fadd float %170, %169 %172 = fsub float -0.000000e+00, %168 %173 = fmul float %53, %31 %174 = fadd float %173, %172 %175 = fadd float %166, 5.000000e-01 %176 = fadd float %168, 5.000000e-01 %177 = fdiv float 1.000000e+00, %30 %178 = fdiv float 1.000000e+00, %31 %179 = fmul float %175, %177 %180 = fmul float %176, %178 %181 = bitcast float %179 to i32 %182 = bitcast float %180 to i32 %183 = bitcast float 0.000000e+00 to i32 %184 = insertelement <4 x i32> undef, i32 %181, i32 0 %185 = insertelement <4 x i32> %184, i32 %182, i32 1 %186 = insertelement <4 x i32> %185, i32 %183, i32 2 %187 = insertelement <4 x i32> %186, i32 undef, i32 3 %188 = bitcast <8 x i32> %41 to <32 x i8> %189 = bitcast <4 x i32> %43 to <16 x i8> %190 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %187, <32 x i8> %188, <16 x i8> %189, i32 2) %191 = extractelement <4 x float> %190, i32 0 %192 = extractelement <4 x float> %190, i32 3 %193 = fsub float -0.000000e+00, %29 %194 = fadd float 1.024000e+03, %193 %195 = fmul float %191, 2.550000e+02 %196 = fmul float %192, 2.550000e+02 %197 = fadd float %196, 5.000000e-01 %198 = call float @llvm.AMDIL.fraction.(float %197) %199 = fsub float -0.000000e+00, %198 %200 = fadd float %197, %199 %201 = fmul float %200, 2.560000e+02 %202 = fadd float %201, %195 %203 = fadd float %202, 5.000000e-01 %204 = call float @llvm.AMDIL.fraction.(float %203) %205 = fsub float -0.000000e+00, %204 %206 = fadd float %205, %203 %207 = fmul float %29, %191 %208 = fadd float %207, 5.000000e-01 %209 = call float @llvm.AMDIL.fraction.(float %208) %210 = fsub float -0.000000e+00, %209 %211 = fadd float %208, %210 %212 = fadd float %206, 5.000000e-01 %213 = fadd float %211, 5.000000e-01 %214 = fcmp oge float %194, 0.000000e+00 %215 = sext i1 %214 to i32 %216 = bitcast i32 %215 to float %217 = bitcast float %216 to i32 %218 = icmp ne i32 %217, 0 %. = select i1 %218, float %213, float %212 %219 = fdiv float 1.000000e+00, %28 %220 = fmul float %32, %219 %221 = fadd float %220, 5.000000e-01 %222 = call float @llvm.AMDIL.fraction.(float %221) %223 = fsub float -0.000000e+00, %222 %224 = fadd float %223, %221 %225 = fmul float %224, %. %226 = fsub float -0.000000e+00, %224 %227 = fcmp oge float %225, 0.000000e+00 %228 = sext i1 %227 to i32 %229 = bitcast i32 %228 to float %230 = bitcast float %229 to i32 %231 = icmp ne i32 %230, 0 %temp24.0 = select i1 %231, float %224, float %226 %232 = fdiv float 1.000000e+00, %temp24.0 %233 = fmul float %232, %. %234 = call float @llvm.AMDIL.fraction.(float %233) %235 = fdiv float 1.000000e+00, %224 %236 = fmul float %234, %temp24.0 %237 = fmul float %235, %. %238 = call float @llvm.AMDIL.fraction.(float %236) %239 = call float @llvm.AMDIL.fraction.(float %237) %240 = fsub float -0.000000e+00, %238 %241 = fadd float %240, %236 %242 = fsub float -0.000000e+00, %239 %243 = fadd float %242, %237 %244 = fadd float %171, %241 %245 = fadd float %174, %243 %246 = fmul float %244, %28 %247 = fmul float %245, %28 %248 = call float @llvm.AMDIL.fraction.(float %246) %249 = call float @llvm.AMDIL.fraction.(float %247) %250 = fsub float -0.000000e+00, %248 %251 = fadd float %250, %246 %252 = fsub float -0.000000e+00, %249 %253 = fadd float %252, %247 %254 = fadd float %251, 5.000000e-01 %255 = fadd float %253, 5.000000e-01 %256 = fdiv float 1.000000e+00, %32 %257 = fdiv float 1.000000e+00, %33 %258 = fmul float %254, %256 %259 = fmul float %255, %257 %260 = bitcast float %258 to i32 %261 = bitcast float %259 to i32 %262 = bitcast float 0.000000e+00 to i32 %263 = insertelement <4 x i32> undef, i32 %260, i32 0 %264 = insertelement <4 x i32> %263, i32 %261, i32 1 %265 = insertelement <4 x i32> %264, i32 %262, i32 2 %266 = insertelement <4 x i32> %265, i32 undef, i32 3 %267 = bitcast <8 x i32> %45 to <32 x i8> %268 = bitcast <4 x i32> %47 to <16 x i8> %269 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %266, <32 x i8> %267, <16 x i8> %268, i32 2) %270 = extractelement <4 x float> %269, i32 0 %271 = extractelement <4 x float> %269, i32 1 %272 = extractelement <4 x float> %269, i32 2 br label %ENDIF45 ELSE47: ; preds = %ENDIF42 %273 = bitcast float %52 to i32 %274 = bitcast float %53 to i32 %275 = insertelement <2 x i32> undef, i32 %273, i32 0 %276 = insertelement <2 x i32> %275, i32 %274, i32 1 %277 = bitcast <8 x i32> %45 to <32 x i8> %278 = bitcast <4 x i32> %47 to <16 x i8> %279 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %276, <32 x i8> %277, <16 x i8> %278, i32 2) %280 = extractelement <4 x float> %279, i32 0 %281 = extractelement <4 x float> %279, i32 1 %282 = extractelement <4 x float> %279, i32 2 br label %ENDIF45 ENDIF45: ; preds = %ELSE47, %IF46 %.sink = phi <4 x float> [ %279, %ELSE47 ], [ %269, %IF46 ] %temp12.0 = phi float [ %270, %IF46 ], [ %280, %ELSE47 ] %temp13.0 = phi float [ %271, %IF46 ], [ %281, %ELSE47 ] %temp14.0 = phi float [ %272, %IF46 ], [ %282, %ELSE47 ] %283 = extractelement <4 x float> %.sink, i32 3 %284 = fmul float %temp12.0, %48 %285 = fmul float %temp13.0, %49 %286 = fmul float %temp14.0, %50 %287 = fmul float %283, %51 %288 = call i32 @llvm.SI.packf16(float %284, float %285) %289 = bitcast i32 %288 to float %290 = call i32 @llvm.SI.packf16(float %286, float %287) %291 = bitcast i32 %290 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %289, float %291, float %289, float %291) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v9, v0, 1, 1, [m0] ; C8240500 V_INTERP_P2_F32 v9, [v9], v1, 1, 1, [m0] ; C8250501 V_INTERP_P1_F32 v8, v0, 0, 1, [m0] ; C8200400 V_INTERP_P2_F32 v8, [v8], v1, 0, 1, [m0] ; C8210401 V_INTERP_P1_F32 v4, v0, 3, 0, [m0] ; C8100300 V_INTERP_P2_F32 v4, [v4], v1, 3, 0, [m0] ; C8110301 V_INTERP_P1_F32 v5, v0, 2, 0, [m0] ; C8140200 V_INTERP_P2_F32 v5, [v5], v1, 2, 0, [m0] ; C8150201 V_INTERP_P1_F32 v6, v0, 1, 0, [m0] ; C8180100 V_INTERP_P2_F32 v6, [v6], v1, 1, 0, [m0] ; C8190101 V_INTERP_P1_F32 v7, v0, 0, 0, [m0] ; C81C0000 V_INTERP_P2_F32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x14 ; C2040114 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x16 ; C2048116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s9 ; 7E000209 V_MAD_F32 v0, v2, s8, v0, 0, 0 ; D2820000 04001102 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v1, s8, v0 ; 0A020008 V_CMP_LT_F32_e64 s[8:9], v1, 0.000000e+00, 0, 0 ; D0020008 00010101 V_CNDMASK_B32_e64 v1, 0, -1, s[8:9], 0, 0, 0, 0 ; D2000001 00218280 V_OR_B32_e32 v1, v1, v1 ; 38020301 V_CMP_NE_I32_e64 s[14:15], v1, 0, 0, 0 ; D10A000E 00010101 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x21 ; C2048121 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x20 ; C2050120 S_BUFFER_LOAD_DWORD s11, s[0:3], 0x17 ; C2058117 S_BUFFER_LOAD_DWORD s12, s[0:3], 0x15 ; C2060115 S_BUFFER_LOAD_DWORD s13, s[0:3], 0x2 ; C2068102 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x1 ; C2040101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v13, s8 ; 7E1A0208 S_AND_SAVEEXEC_B64 s[14:15], s[14:15] ; BE8E240E S_XOR_B64 s[14:15], exec, s[14:15] ; 898E0E7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[14:15] ; 88FE0E7E S_BUFFER_LOAD_DWORD s8, s[0:3], 0x3 ; C2040103 V_MOV_B32_e32 v1, s9 ; 7E020209 V_MOV_B32_e32 v10, s10 ; 7E14020A V_MOV_B32_e32 v2, s11 ; 7E04020B V_MOV_B32_e32 v12, s12 ; 7E18020C V_MOV_B32_e32 v11, s13 ; 7E16020D S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_SUB_F32_e32 v0, v13, v0 ; 0800010D V_CMP_LT_F32_e64 s[10:11], v0, 0.000000e+00, 0, 0 ; D002000A 00010100 V_CNDMASK_B32_e64 v0, 0, -1, s[10:11], 0, 0, 0, 0 ; D2000000 00298280 V_OR_B32_e32 v0, v0, v0 ; 38000100 V_CMP_NE_I32_e64 s[10:11], v0, 0, 0, 0 ; D10A000A 00010100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[10:11], s[10:11] ; BE8A240A S_XOR_B64 s[10:11], exec, s[10:11] ; 898A0A7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[10:11] ; 88FE0A7E S_BUFFER_LOAD_DWORD s9, s[0:3], 0x4 ; C2048104 V_MOV_B32_e32 v13, s8 ; 7E1A0208 S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_MAD_F32 v0, v3, v10, v1, 0, 0 ; D2820000 04061503 V_MAD_F32 v1, v0, v12, v2, 0, 0 ; D2820001 040A1900 V_SUB_F32_e32 v0, v1, v11 ; 08001701 V_CMP_LT_F32_e64 s[10:11], v0, 0.000000e+00, 0, 0 ; D002000A 00010100 V_CNDMASK_B32_e64 v0, 0, -1, s[10:11], 0, 0, 0, 0 ; D2000000 00298280 V_OR_B32_e32 v0, v0, v0 ; 38000100 V_CMP_NE_I32_e64 s[10:11], v0, 0, 0, 0 ; D10A000A 00010100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[10:11], s[10:11] ; BE8A240A S_XOR_B64 s[10:11], exec, s[10:11] ; 898A0A7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[10:11] ; 88FE0A7E V_MOV_B32_e32 v0, s9 ; 7E000209 S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_SUB_F32_e32 v1, v13, v1 ; 0802030D V_CMP_LT_F32_e64 s[8:9], v1, 0.000000e+00, 0, 0 ; D0020008 00010101 V_CNDMASK_B32_e64 v1, 0, -1, s[8:9], 0, 0, 0, 0 ; D2000001 00218280 V_OR_B32_e32 v1, v1, v1 ; 38020301 V_CMP_NE_I32_e64 s[8:9], v1, 0, 0, 0 ; D10A0008 00010101 S_AND_SAVEEXEC_B64 s[8:9], s[8:9] ; BE882408 S_XOR_B64 s[8:9], exec, s[8:9] ; 8988087E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[8:9] ; 88FE087E S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x4 ; C0840504 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x8 ; C0C60708 V_CMP_GT_F32_e64 s[20:21], v0, 5.000000e-01, 0, 0 ; D0080014 0001E100 V_CNDMASK_B32_e64 v1, 0, -1, s[20:21], 0, 0, 0, 0 ; D2000001 00518280 V_CMP_EQ_I32_e64 s[20:21], v1, 0, 0, 0 ; D1040014 00010101 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[20:21], s[20:21] ; BE942414 S_XOR_B64 s[20:21], exec, s[20:21] ; 8994147E IMAGE_SAMPLE v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[12:19], s[8:11] ; F0800F00 00430A08 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_SAVEEXEC_B64 s[20:21], s[20:21] ; BE942514 S_XOR_B64 exec, exec, s[20:21] ; 89FE147E S_CBRANCH_EXECZ BB0_12 ; BF880000 S_BUFFER_LOAD_DWORD s22, s[0:3], 0x11 ; C20B0111 S_BUFFER_LOAD_DWORD s23, s[0:3], 0x10 ; C20B8110 S_BUFFER_LOAD_DWORD s24, s[0:3], 0xd ; C20C010D S_BUFFER_LOAD_DWORD s25, s[0:3], 0xc ; C20C810C S_BUFFER_LOAD_DWORD s26, s[0:3], 0x8 ; C20D0108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s22 ; 7E020216 V_MOV_B32_e32 v2, s23 ; 7E040217 V_MOV_B32_e32 v3, s24 ; 7E060218 V_MOV_B32_e32 v10, s25 ; 7E140219 V_MOV_B32_e32 v11, s26 ; 7E16021A V_MUL_F32_e32 v12, v9, v3 ; 10180709 V_FRACT_F32_e32 v13, v12 ; 7E1A410C V_SUB_F32_e32 v13, v12, v13 ; 081A1B0C V_ADD_F32_e32 v14, 5.000000e-01, v13 ; 061C1AF0 V_RCP_F32_e32 v3, v3 ; 7E065503 V_MUL_F32_e32 v15, v14, v3 ; 101E070E V_MUL_F32_e32 v3, v8, v10 ; 10061508 V_FRACT_F32_e32 v8, v3 ; 7E104103 V_SUB_F32_e32 v8, v3, v8 ; 08101103 V_ADD_F32_e32 v9, 5.000000e-01, v8 ; 061210F0 V_RCP_F32_e32 v10, v10 ; 7E14550A V_MUL_F32_e32 v14, v9, v10 ; 101C1509 V_MOV_B32_e32 v16, 0 ; 7E200280 S_LOAD_DWORDX4 s[24:27], s[4:5], 0x0 ; C08C0500 S_LOAD_DWORDX8 s[28:35], s[6:7], 0x0 ; C0CE0700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE_L v[9:10], 9, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[28:35], s[24:27] ; F0900900 00C7090E V_MOV_B32_e32 v18, 2.550000e+02 ; 7E2402FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v19, v10, v18, 5.000000e-01, 0, 0 ; D2820013 03C2250A V_FRACT_F32_e32 v20, v19 ; 7E284113 V_SUB_F32_e32 v19, v19, v20 ; 08262913 V_MUL_F32_e32 v19, 2.560000e+02, v19 ; 102626FF 43800000 V_MAD_F32 v18, v9, v18, v19, 0, 0 ; D2820012 044E2509 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_FRACT_F32_e32 v19, v18 ; 7E264112 V_SUB_F32_e32 v18, v18, v19 ; 08242712 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_MAD_F32 v9, v11, v9, 5.000000e-01, 0, 0 ; D2820009 03C2130B V_FRACT_F32_e32 v10, v9 ; 7E144109 V_SUB_F32_e32 v9, v9, v10 ; 08121509 V_ADD_F32_e32 v9, 5.000000e-01, v9 ; 061212F0 V_SUB_F32_e32 v10, 1.024000e+03, v11 ; 081416FF 44800000 V_CMP_GE_F32_e64 s[22:23], v10, 0.000000e+00, 0, 0 ; D00C0016 0001010A V_CNDMASK_B32_e64 v9, v18, v9, s[22:23], 0, 0, 0, 0 ; D2000009 005A1312 V_RCP_F32_e32 v10, v0 ; 7E145500 V_MAD_F32 v10, v2, v10, 5.000000e-01, 0, 0 ; D282000A 03C21502 V_FRACT_F32_e32 v11, v10 ; 7E16410A V_SUB_F32_e32 v10, v10, v11 ; 0814170A V_MUL_F32_e32 v11, v10, v9 ; 1016130A V_CMP_GE_F32_e64 s[22:23], v11, 0.000000e+00, 0, 0 ; D00C0016 0001010B V_MOV_B32_e32 v11, 0x80000000 ; 7E1602FF 80000000 V_XOR_B32_e32 v11, v10, v11 ; 3A16170A V_CNDMASK_B32_e64 v11, v11, v10, s[22:23], 0, 0, 0, 0 ; D200000B 005A150B V_RCP_F32_e32 v18, v11 ; 7E24550B V_MUL_F32_e32 v18, v18, v9 ; 10241312 V_FRACT_F32_e32 v18, v18 ; 7E244112 V_MUL_F32_e32 v11, v18, v11 ; 10161712 V_FRACT_F32_e32 v18, v11 ; 7E24410B V_SUB_F32_e32 v11, v11, v18 ; 0816250B V_SUB_F32_e32 v3, v3, v8 ; 08061103 V_ADD_F32_e32 v3, v3, v11 ; 06061703 V_MUL_F32_e32 v3, v3, v0 ; 10060103 V_FRACT_F32_e32 v8, v3 ; 7E104103 V_SUB_F32_e32 v3, v3, v8 ; 08061103 V_ADD_F32_e32 v3, 5.000000e-01, v3 ; 060606F0 V_RCP_F32_e32 v2, v2 ; 7E045502 V_MUL_F32_e32 v14, v3, v2 ; 101C0503 V_RCP_F32_e32 v2, v10 ; 7E04550A V_MUL_F32_e32 v2, v2, v9 ; 10041302 V_FRACT_F32_e32 v3, v2 ; 7E064102 V_SUB_F32_e32 v2, v2, v3 ; 08040702 V_SUB_F32_e32 v3, v12, v13 ; 08061B0C V_ADD_F32_e32 v2, v3, v2 ; 06040503 V_MUL_F32_e32 v0, v2, v0 ; 10000102 V_FRACT_F32_e32 v2, v0 ; 7E044100 V_SUB_F32_e32 v0, v0, v2 ; 08000500 V_ADD_F32_e32 v0, 5.000000e-01, v0 ; 060000F0 V_RCP_F32_e32 v1, v1 ; 7E025501 V_MUL_F32_e32 v15, v0, v1 ; 101E0300 IMAGE_SAMPLE_L v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[12:19], s[8:11] ; F0900F00 00430A0E S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_B64 exec, exec, s[20:21] ; 88FE147E V_MUL_F32_e32 v0, v11, v6 ; 10000D0B V_MUL_F32_e32 v1, v10, v7 ; 10020F0A V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_MUL_F32_e32 v1, v12, v5 ; 10020B0C V_MUL_F32_e32 v2, v13, v4 ; 1004090D V_CVT_PKRTZ_F16_F32_e32 v1, v1, v2 ; 5E020501 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xa ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v3 ; 100A0604 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s4, v2 ; 100C0404 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s4, v1 ; 100E0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xb ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v4 ; 10020804 S_BUFFER_LOAD_DWORD s4, s[0:3], 0xf ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v1 ; 10020204 EXP 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x7 ; C2020107 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v2, v6, v5, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v1, v1, v7, v6, 0, 0 ; D2820001 041A0F01 V_MOV_B32_e32 v2, 1.000000e+00 ; 7E0402F2 V_MOV_B32_e32 v3, 0.000000e+00 ; 7E060280 EXP 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x2 ; C2020102 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_MOV_B32_e32 v4, s4 ; 7E080204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v0, v5, v4, 0, 0 ; D2820004 04120B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v0, v1, v6, v5, 0, 0 ; D2820000 04160D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1b ; C202011B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s4, v0 ; 10020004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x17 ; C2020117 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v4, s4, v1, 0, 0 ; D2820001 04040904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x23 ; C2020123 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s4, v1 ; 06020204 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x10 ; C2020110 S_BUFFER_LOAD_DWORD s5, s[0:3], 0x11 ; C2028111 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s5 ; 7E040205 V_ADD_F32_e32 v2, s4, v2 ; 06040404 V_MUL_F32_e32 v1, v1, v2 ; 10020501 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x1a ; C202011A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v0 ; 10060004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x16 ; C2020116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v4, s4, v3, 0, 0 ; D2820003 040C0904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x22 ; C2020122 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v3, s4, v3 ; 06060604 V_MUL_F32_e32 v3, v3, v2 ; 10060503 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x19 ; C2020119 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v0 ; 100A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x15 ; C2020115 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, v4, s4, v5, 0, 0 ; D2820005 04140904 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x21 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s4, v5 ; 060A0A04 V_MUL_F32_e32 v5, v5, v2 ; 100A0505 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x18 ; C2020118 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s4, v0 ; 10000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 0x14 ; C2020114 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v4, s4, v0, 0, 0 ; D2820000 04000904 S_BUFFER_LOAD_DWORD s0, s[0:3], 0x20 ; C2000120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v0, v0, v2 ; 10000500 EXP 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 S_ENDPGM ; BF810000 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[8] DCL CONST[0..5] DCL TEMP[0] DCL TEMP[1..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 1024.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[8].xxxx, CONST[8].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[5].xyxx, CONST[5].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 8: UIF TEMP[5].xxxx :2 9: KILL 10: ENDIF 11: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 12: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 13: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 14: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 15: UIF TEMP[5].xxxx :2 16: KILL 17: ENDIF 18: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 19: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 20: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 21: OR TEMP[4].x, TEMP[4].xxxx, TEMP[2].yyyy 22: UIF TEMP[4].xxxx :2 23: KILL 24: ENDIF 25: ADD TEMP[3], CONST[0].wwww, -TEMP[1].yyyy 26: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 27: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 28: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 29: UIF TEMP[2].xxxx :2 30: KILL 31: ENDIF 32: MOV TEMP[3].xz, IMM[0].yyzy 33: FSLT TEMP[1].x, IMM[0].yyyy, CONST[1].xxxx 34: UIF TEMP[1].xxxx :2 35: MUL TEMP[1].yw, CONST[3].xxzy, IN[2].xxzy 36: MOV TEMP[3].yw, TEMP[1].wyww 37: FRC TEMP[1].xy, TEMP[3].ywzw 38: MOV TEMP[1].xy, TEMP[1].xyxx 39: ADD TEMP[2].yw, TEMP[3], -TEMP[1].xxzy 40: MOV TEMP[3].yw, TEMP[2].wyww 41: MAD TEMP[2].xy, IN[2], CONST[3], -TEMP[3].ywzw 42: MOV TEMP[1].xy, TEMP[2].xyxx 43: ADD TEMP[2].yw, TEMP[3], IMM[0].yyyy 44: MOV TEMP[3].yw, TEMP[2].wyww 45: RCP TEMP[2].x, CONST[3].xxxx 46: RCP TEMP[4].x, CONST[3].yyyy 47: MOV TEMP[2].y, TEMP[4].xxxx 48: MUL TEMP[4].xy, TEMP[3].ywzw, TEMP[2] 49: MOV TEMP[4].xy, TEMP[4].xyyy 50: MOV TEMP[4].w, IMM[0].xxxx 51: TXL TEMP[4], TEMP[4], SAMP[0], 2D 52: MOV TEMP[2].zw, TEMP[4] 53: ADD TEMP[5].y, IMM[0].zzzz, -CONST[2].xxxx 54: MOV TEMP[3].y, TEMP[5].yyyy 55: MUL TEMP[6].z, TEMP[4].xxxx, IMM[0].wwww 56: MAD TEMP[7].w, TEMP[4].wwww, IMM[0].wwww, IMM[0].yyyy 57: FRC TEMP[8].z, TEMP[7].wwww 58: ADD TEMP[7].w, TEMP[7].wwww, -TEMP[8].zzzz 59: MAD TEMP[6].z, TEMP[7].wwww, IMM[1].xxxx, TEMP[6].zzzz 60: ADD TEMP[6].z, TEMP[6].zzzz, IMM[0].yyyy 61: FRC TEMP[7].w, TEMP[6].zzzz 62: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].zzzz 63: MOV TEMP[3].z, TEMP[6].zzzz 64: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].yyyy 65: FRC TEMP[6].z, TEMP[4].wwww 66: MOV TEMP[1].z, TEMP[6].zzzz 67: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 68: MOV TEMP[3].w, TEMP[4].wwww 69: ADD TEMP[4].zw, TEMP[3], IMM[0].yyyy 70: FSGE TEMP[5].x, TEMP[5].yyyy, IMM[0].xxxx 71: UIF TEMP[5].xxxx :2 72: MOV TEMP[5].x, TEMP[4].wwww 73: ELSE :2 74: MOV TEMP[5].x, TEMP[4].zzzz 75: ENDIF 76: MOV TEMP[3].y, TEMP[5].xxxx 77: RCP TEMP[4].x, CONST[1].xxxx 78: MAD TEMP[4].x, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].yyyy 79: FRC TEMP[6].z, TEMP[4].xxxx 80: ADD TEMP[4].x, -TEMP[6].zzzz, TEMP[4].xxxx 81: MUL TEMP[6].z, TEMP[4].xxxx, TEMP[5].xxxx 82: MOV TEMP[7].x, -TEMP[4].xxxx 83: FSGE TEMP[6].x, TEMP[6].zzzz, IMM[0].xxxx 84: UIF TEMP[6].xxxx :2 85: MOV TEMP[6].x, TEMP[4].xxxx 86: ELSE :2 87: MOV TEMP[6].x, TEMP[7].xxxx 88: ENDIF 89: MOV TEMP[3].z, TEMP[6].xxxx 90: RCP TEMP[6].x, TEMP[6].xxxx 91: MUL TEMP[5].w, TEMP[6].xxxx, TEMP[5].xxxx 92: FRC TEMP[5].w, TEMP[5].wwww 93: MOV TEMP[3].w, TEMP[5].wwww 94: RCP TEMP[3].x, TEMP[4].xxxx 95: MUL TEMP[4].xy, TEMP[3].wxzw, TEMP[3].zyzw 96: MOV TEMP[2].xy, TEMP[4].xyxx 97: FRC TEMP[4].xy, TEMP[2] 98: MOV TEMP[3].xy, TEMP[4].xyxx 99: ADD TEMP[2].xy, -TEMP[3], TEMP[2] 100: MOV TEMP[3].xy, TEMP[2].xyxx 101: ADD TEMP[2].xy, TEMP[1], TEMP[3] 102: MOV TEMP[3].xy, TEMP[2].xyxx 103: MUL TEMP[2].xy, TEMP[3], CONST[1].xxxx 104: MOV TEMP[3].xy, TEMP[2].xyxx 105: FRC TEMP[2].zw, TEMP[2].xyxy 106: MOV TEMP[3].zw, TEMP[2].wwzw 107: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[3] 108: MOV TEMP[3].xy, TEMP[2].xyxx 109: ADD TEMP[2].xy, TEMP[3], IMM[0].yyyy 110: MOV TEMP[3].xy, TEMP[2].xyxx 111: RCP TEMP[1].x, CONST[4].xxxx 112: RCP TEMP[2].x, CONST[4].yyyy 113: MOV TEMP[1].y, TEMP[2].xxxx 114: MUL TEMP[1].xy, TEMP[3], TEMP[1] 115: MOV TEMP[1].xy, TEMP[1].xyyy 116: MOV TEMP[1].w, IMM[0].xxxx 117: TXL TEMP[1], TEMP[1], SAMP[1], 2D 118: MOV TEMP[3], TEMP[1] 119: ELSE :2 120: MOV TEMP[1].xy, IN[2].xyyy 121: TEX TEMP[1], TEMP[1], SAMP[1], 2D 122: MOV TEMP[3], TEMP[1] 123: ENDIF 124: MUL TEMP[1], TEMP[3], IN[1] 125: MOV OUT[0], TEMP[1] 126: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %41 = load <8 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %43 = load <4 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [32 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [16 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %54 = fmul float %15, %38 %55 = fadd float %54, %39 %56 = fmul float %14, %34 %57 = fadd float %56, %36 %58 = fmul float %55, %35 %59 = fadd float %58, %37 %60 = fsub float -0.000000e+00, %24 %61 = fsub float -0.000000e+00, %26 %62 = fadd float %60, %57 %63 = fadd float %60, %57 %64 = fadd float %60, %57 %65 = fcmp olt float %62, 0.000000e+00 %66 = sext i1 %65 to i32 %67 = fcmp olt float %63, 0.000000e+00 %68 = sext i1 %67 to i32 %69 = fcmp olt float %64, 0.000000e+00 %70 = sext i1 %69 to i32 %71 = bitcast i32 %66 to float %72 = bitcast i32 %68 to float %73 = bitcast i32 %70 to float %74 = bitcast float %71 to i32 %75 = bitcast float %73 to i32 %76 = or i32 %74, %75 %77 = bitcast i32 %76 to float %78 = bitcast float %77 to i32 %79 = bitcast float %72 to i32 %80 = or i32 %78, %79 %81 = bitcast i32 %80 to float %82 = bitcast float %81 to i32 %83 = icmp ne i32 %82, 0 br i1 %83, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %84 = fsub float -0.000000e+00, %57 %85 = fadd float %25, %84 %86 = fsub float -0.000000e+00, %57 %87 = fadd float %25, %86 %88 = fsub float -0.000000e+00, %57 %89 = fadd float %25, %88 %90 = fcmp olt float %85, 0.000000e+00 %91 = sext i1 %90 to i32 %92 = fcmp olt float %87, 0.000000e+00 %93 = sext i1 %92 to i32 %94 = fcmp olt float %89, 0.000000e+00 %95 = sext i1 %94 to i32 %96 = bitcast i32 %91 to float %97 = bitcast i32 %93 to float %98 = bitcast i32 %95 to float %99 = bitcast float %96 to i32 %100 = bitcast float %98 to i32 %101 = or i32 %99, %100 %102 = bitcast i32 %101 to float %103 = bitcast float %102 to i32 %104 = bitcast float %97 to i32 %105 = or i32 %103, %104 %106 = bitcast i32 %105 to float %107 = bitcast float %106 to i32 %108 = icmp ne i32 %107, 0 br i1 %108, label %IF37, label %ENDIF36 IF37: ; preds = %ENDIF call void @llvm.AMDGPU.kilp() br label %ENDIF36 ENDIF36: ; preds = %ENDIF, %IF37 %109 = fadd float %61, %59 %110 = fadd float %61, %59 %111 = fadd float %61, %59 %112 = fcmp olt float %109, 0.000000e+00 %113 = sext i1 %112 to i32 %114 = fcmp olt float %110, 0.000000e+00 %115 = sext i1 %114 to i32 %116 = fcmp olt float %111, 0.000000e+00 %117 = sext i1 %116 to i32 %118 = bitcast i32 %113 to float %119 = bitcast i32 %115 to float %120 = bitcast i32 %117 to float %121 = bitcast float %118 to i32 %122 = bitcast float %120 to i32 %123 = or i32 %121, %122 %124 = bitcast i32 %123 to float %125 = bitcast float %124 to i32 %126 = bitcast float %119 to i32 %127 = or i32 %125, %126 %128 = bitcast i32 %127 to float %129 = bitcast float %128 to i32 %130 = icmp ne i32 %129, 0 br i1 %130, label %IF40, label %ENDIF39 IF40: ; preds = %ENDIF36 call void @llvm.AMDGPU.kilp() br label %ENDIF39 ENDIF39: ; preds = %ENDIF36, %IF40 %131 = fsub float -0.000000e+00, %59 %132 = fadd float %27, %131 %133 = fsub float -0.000000e+00, %59 %134 = fadd float %27, %133 %135 = fsub float -0.000000e+00, %59 %136 = fadd float %27, %135 %137 = fcmp olt float %132, 0.000000e+00 %138 = sext i1 %137 to i32 %139 = fcmp olt float %134, 0.000000e+00 %140 = sext i1 %139 to i32 %141 = fcmp olt float %136, 0.000000e+00 %142 = sext i1 %141 to i32 %143 = bitcast i32 %138 to float %144 = bitcast i32 %140 to float %145 = bitcast i32 %142 to float %146 = bitcast float %143 to i32 %147 = bitcast float %145 to i32 %148 = or i32 %146, %147 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = bitcast float %144 to i32 %152 = or i32 %150, %151 %153 = bitcast i32 %152 to float %154 = bitcast float %153 to i32 %155 = icmp ne i32 %154, 0 br i1 %155, label %IF43, label %ENDIF42 IF43: ; preds = %ENDIF39 call void @llvm.AMDGPU.kilp() br label %ENDIF42 ENDIF42: ; preds = %ENDIF39, %IF43 %156 = fcmp olt float 5.000000e-01, %28 %157 = sext i1 %156 to i32 %158 = bitcast i32 %157 to float %159 = bitcast float %158 to i32 %160 = icmp ne i32 %159, 0 br i1 %160, label %IF46, label %ELSE47 IF46: ; preds = %ENDIF42 %161 = fmul float %30, %52 %162 = fmul float %31, %53 %163 = call float @llvm.AMDIL.fraction.(float %161) %164 = call float @llvm.AMDIL.fraction.(float %162) %165 = fsub float -0.000000e+00, %163 %166 = fadd float %161, %165 %167 = fsub float -0.000000e+00, %164 %168 = fadd float %162, %167 %169 = fsub float -0.000000e+00, %166 %170 = fmul float %52, %30 %171 = fadd float %170, %169 %172 = fsub float -0.000000e+00, %168 %173 = fmul float %53, %31 %174 = fadd float %173, %172 %175 = fadd float %166, 5.000000e-01 %176 = fadd float %168, 5.000000e-01 %177 = fdiv float 1.000000e+00, %30 %178 = fdiv float 1.000000e+00, %31 %179 = fmul float %175, %177 %180 = fmul float %176, %178 %181 = bitcast float %179 to i32 %182 = bitcast float %180 to i32 %183 = bitcast float 0.000000e+00 to i32 %184 = insertelement <4 x i32> undef, i32 %181, i32 0 %185 = insertelement <4 x i32> %184, i32 %182, i32 1 %186 = insertelement <4 x i32> %185, i32 %183, i32 2 %187 = insertelement <4 x i32> %186, i32 undef, i32 3 %188 = bitcast <8 x i32> %41 to <32 x i8> %189 = bitcast <4 x i32> %43 to <16 x i8> %190 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %187, <32 x i8> %188, <16 x i8> %189, i32 2) %191 = extractelement <4 x float> %190, i32 0 %192 = extractelement <4 x float> %190, i32 3 %193 = fsub float -0.000000e+00, %29 %194 = fadd float 1.024000e+03, %193 %195 = fmul float %191, 2.550000e+02 %196 = fmul float %192, 2.550000e+02 %197 = fadd float %196, 5.000000e-01 %198 = call float @llvm.AMDIL.fraction.(float %197) %199 = fsub float -0.000000e+00, %198 %200 = fadd float %197, %199 %201 = fmul float %200, 2.560000e+02 %202 = fadd float %201, %195 %203 = fadd float %202, 5.000000e-01 %204 = call float @llvm.AMDIL.fraction.(float %203) %205 = fsub float -0.000000e+00, %204 %206 = fadd float %205, %203 %207 = fmul float %29, %191 %208 = fadd float %207, 5.000000e-01 %209 = call float @llvm.AMDIL.fraction.(float %208) %210 = fsub float -0.000000e+00, %209 %211 = fadd float %208, %210 %212 = fadd float %206, 5.000000e-01 %213 = fadd float %211, 5.000000e-01 %214 = fcmp oge float %194, 0.000000e+00 %215 = sext i1 %214 to i32 %216 = bitcast i32 %215 to float %217 = bitcast float %216 to i32 %218 = icmp ne i32 %217, 0 %. = select i1 %218, float %213, float %212 %219 = fdiv float 1.000000e+00, %28 %220 = fmul float %32, %219 %221 = fadd float %220, 5.000000e-01 %222 = call float @llvm.AMDIL.fraction.(float %221) %223 = fsub float -0.000000e+00, %222 %224 = fadd float %223, %221 %225 = fmul float %224, %. %226 = fsub float -0.000000e+00, %224 %227 = fcmp oge float %225, 0.000000e+00 %228 = sext i1 %227 to i32 %229 = bitcast i32 %228 to float %230 = bitcast float %229 to i32 %231 = icmp ne i32 %230, 0 %temp24.0 = select i1 %231, float %224, float %226 %232 = fdiv float 1.000000e+00, %temp24.0 %233 = fmul float %232, %. %234 = call float @llvm.AMDIL.fraction.(float %233) %235 = fdiv float 1.000000e+00, %224 %236 = fmul float %234, %temp24.0 %237 = fmul float %235, %. %238 = call float @llvm.AMDIL.fraction.(float %236) %239 = call float @llvm.AMDIL.fraction.(float %237) %240 = fsub float -0.000000e+00, %238 %241 = fadd float %240, %236 %242 = fsub float -0.000000e+00, %239 %243 = fadd float %242, %237 %244 = fadd float %171, %241 %245 = fadd float %174, %243 %246 = fmul float %244, %28 %247 = fmul float %245, %28 %248 = call float @llvm.AMDIL.fraction.(float %246) %249 = call float @llvm.AMDIL.fraction.(float %247) %250 = fsub float -0.000000e+00, %248 %251 = fadd float %250, %246 %252 = fsub float -0.000000e+00, %249 %253 = fadd float %252, %247 %254 = fadd float %251, 5.000000e-01 %255 = fadd float %253, 5.000000e-01 %256 = fdiv float 1.000000e+00, %32 %257 = fdiv float 1.000000e+00, %33 %258 = fmul float %254, %256 %259 = fmul float %255, %257 %260 = bitcast float %258 to i32 %261 = bitcast float %259 to i32 %262 = bitcast float 0.000000e+00 to i32 %263 = insertelement <4 x i32> undef, i32 %260, i32 0 %264 = insertelement <4 x i32> %263, i32 %261, i32 1 %265 = insertelement <4 x i32> %264, i32 %262, i32 2 %266 = insertelement <4 x i32> %265, i32 undef, i32 3 %267 = bitcast <8 x i32> %45 to <32 x i8> %268 = bitcast <4 x i32> %47 to <16 x i8> %269 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %266, <32 x i8> %267, <16 x i8> %268, i32 2) %270 = extractelement <4 x float> %269, i32 0 %271 = extractelement <4 x float> %269, i32 1 %272 = extractelement <4 x float> %269, i32 2 br label %ENDIF45 ELSE47: ; preds = %ENDIF42 %273 = bitcast float %52 to i32 %274 = bitcast float %53 to i32 %275 = insertelement <2 x i32> undef, i32 %273, i32 0 %276 = insertelement <2 x i32> %275, i32 %274, i32 1 %277 = bitcast <8 x i32> %45 to <32 x i8> %278 = bitcast <4 x i32> %47 to <16 x i8> %279 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %276, <32 x i8> %277, <16 x i8> %278, i32 2) %280 = extractelement <4 x float> %279, i32 0 %281 = extractelement <4 x float> %279, i32 1 %282 = extractelement <4 x float> %279, i32 2 br label %ENDIF45 ENDIF45: ; preds = %ELSE47, %IF46 %.sink = phi <4 x float> [ %279, %ELSE47 ], [ %269, %IF46 ] %temp12.0 = phi float [ %270, %IF46 ], [ %280, %ELSE47 ] %temp13.0 = phi float [ %271, %IF46 ], [ %281, %ELSE47 ] %temp14.0 = phi float [ %272, %IF46 ], [ %282, %ELSE47 ] %283 = extractelement <4 x float> %.sink, i32 3 %284 = fmul float %temp12.0, %48 %285 = fmul float %temp13.0, %49 %286 = fmul float %temp14.0, %50 %287 = fmul float %283, %51 %288 = call i32 @llvm.SI.packf16(float %284, float %285) %289 = bitcast i32 %288 to float %290 = call i32 @llvm.SI.packf16(float %286, float %287) %291 = bitcast i32 %290 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %289, float %291, float %289, float %291) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v9, v0, 1, 1, [m0] ; C8240500 V_INTERP_P2_F32 v9, [v9], v1, 1, 1, [m0] ; C8250501 V_INTERP_P1_F32 v8, v0, 0, 1, [m0] ; C8200400 V_INTERP_P2_F32 v8, [v8], v1, 0, 1, [m0] ; C8210401 V_INTERP_P1_F32 v4, v0, 3, 0, [m0] ; C8100300 V_INTERP_P2_F32 v4, [v4], v1, 3, 0, [m0] ; C8110301 V_INTERP_P1_F32 v5, v0, 2, 0, [m0] ; C8140200 V_INTERP_P2_F32 v5, [v5], v1, 2, 0, [m0] ; C8150201 V_INTERP_P1_F32 v6, v0, 1, 0, [m0] ; C8180100 V_INTERP_P2_F32 v6, [v6], v1, 1, 0, [m0] ; C8190101 V_INTERP_P1_F32 v7, v0, 0, 0, [m0] ; C81C0000 V_INTERP_P2_F32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 0x14 ; C2040114 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x16 ; C2048116 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s9 ; 7E000209 V_MAD_F32 v0, v2, s8, v0, 0, 0 ; D2820000 04001102 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x0 ; C2040100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_SUBREV_F32_e32 v1, s8, v0 ; 0A020008 V_CMP_LT_F32_e64 s[8:9], v1, 0.000000e+00, 0, 0 ; D0020008 00010101 V_CNDMASK_B32_e64 v1, 0, -1, s[8:9], 0, 0, 0, 0 ; D2000001 00218280 V_OR_B32_e32 v1, v1, v1 ; 38020301 V_CMP_NE_I32_e64 s[14:15], v1, 0, 0, 0 ; D10A000E 00010101 S_BUFFER_LOAD_DWORD s9, s[0:3], 0x21 ; C2048121 S_BUFFER_LOAD_DWORD s10, s[0:3], 0x20 ; C2050120 S_BUFFER_LOAD_DWORD s11, s[0:3], 0x17 ; C2058117 S_BUFFER_LOAD_DWORD s12, s[0:3], 0x15 ; C2060115 S_BUFFER_LOAD_DWORD s13, s[0:3], 0x2 ; C2068102 S_BUFFER_LOAD_DWORD s8, s[0:3], 0x1 ; C2040101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v13, s8 ; 7E1A0208 S_AND_SAVEEXEC_B64 s[14:15], s[14:15] ; BE8E240E S_XOR_B64 s[14:15], exec, s[14:15] ; 898E0E7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[14:15] ; 88FE0E7E S_BUFFER_LOAD_DWORD s8, s[0:3], 0x3 ; C2040103 V_MOV_B32_e32 v1, s9 ; 7E020209 V_MOV_B32_e32 v10, s10 ; 7E14020A V_MOV_B32_e32 v2, s11 ; 7E04020B V_MOV_B32_e32 v12, s12 ; 7E18020C V_MOV_B32_e32 v11, s13 ; 7E16020D S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_SUB_F32_e32 v0, v13, v0 ; 0800010D V_CMP_LT_F32_e64 s[10:11], v0, 0.000000e+00, 0, 0 ; D002000A 00010100 V_CNDMASK_B32_e64 v0, 0, -1, s[10:11], 0, 0, 0, 0 ; D2000000 00298280 V_OR_B32_e32 v0, v0, v0 ; 38000100 V_CMP_NE_I32_e64 s[10:11], v0, 0, 0, 0 ; D10A000A 00010100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[10:11], s[10:11] ; BE8A240A S_XOR_B64 s[10:11], exec, s[10:11] ; 898A0A7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[10:11] ; 88FE0A7E S_BUFFER_LOAD_DWORD s9, s[0:3], 0x4 ; C2048104 V_MOV_B32_e32 v13, s8 ; 7E1A0208 S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_MAD_F32 v0, v3, v10, v1, 0, 0 ; D2820000 04061503 V_MAD_F32 v1, v0, v12, v2, 0, 0 ; D2820001 040A1900 V_SUB_F32_e32 v0, v1, v11 ; 08001701 V_CMP_LT_F32_e64 s[10:11], v0, 0.000000e+00, 0, 0 ; D002000A 00010100 V_CNDMASK_B32_e64 v0, 0, -1, s[10:11], 0, 0, 0, 0 ; D2000000 00298280 V_OR_B32_e32 v0, v0, v0 ; 38000100 V_CMP_NE_I32_e64 s[10:11], v0, 0, 0, 0 ; D10A000A 00010100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[10:11], s[10:11] ; BE8A240A S_XOR_B64 s[10:11], exec, s[10:11] ; 898A0A7E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[10:11] ; 88FE0A7E V_MOV_B32_e32 v0, s9 ; 7E000209 S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 V_SUB_F32_e32 v1, v13, v1 ; 0802030D V_CMP_LT_F32_e64 s[8:9], v1, 0.000000e+00, 0, 0 ; D0020008 00010101 V_CNDMASK_B32_e64 v1, 0, -1, s[8:9], 0, 0, 0, 0 ; D2000001 00218280 V_OR_B32_e32 v1, v1, v1 ; 38020301 V_CMP_NE_I32_e64 s[8:9], v1, 0, 0, 0 ; D10A0008 00010101 S_AND_SAVEEXEC_B64 s[8:9], s[8:9] ; BE882408 S_XOR_B64 s[8:9], exec, s[8:9] ; 8988087E S_MOV_B64 exec, 0 ; BEFE0480 S_OR_B64 exec, exec, s[8:9] ; 88FE087E S_CBRANCH_EXECNZ 3 ; BF890003 EXP 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 S_ENDPGM ; BF810000 S_LOAD_DWORDX4 s[8:11], s[4:5], 0x4 ; C0840504 S_LOAD_DWORDX8 s[12:19], s[6:7], 0x8 ; C0C60708 V_CMP_GT_F32_e64 s[20:21], v0, 5.000000e-01, 0, 0 ; D0080014 0001E100 V_CNDMASK_B32_e64 v1, 0, -1, s[20:21], 0, 0, 0, 0 ; D2000001 00518280 V_CMP_EQ_I32_e64 s[20:21], v1, 0, 0, 0 ; D1040014 00010101 S_WAITCNT lgkmcnt(0) ; BF8C007F S_AND_SAVEEXEC_B64 s[20:21], s[20:21] ; BE942414 S_XOR_B64 s[20:21], exec, s[20:21] ; 8994147E IMAGE_SAMPLE v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[12:19], s[8:11] ; F0800F00 00430A08 S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_SAVEEXEC_B64 s[20:21], s[20:21] ; BE942514 S_XOR_B64 exec, exec, s[20:21] ; 89FE147E S_CBRANCH_EXECZ BB0_12 ; BF880000 S_BUFFER_LOAD_DWORD s22, s[0:3], 0x11 ; C20B0111 S_BUFFER_LOAD_DWORD s23, s[0:3], 0x10 ; C20B8110 S_BUFFER_LOAD_DWORD s24, s[0:3], 0xd ; C20C010D S_BUFFER_LOAD_DWORD s25, s[0:3], 0xc ; C20C810C S_BUFFER_LOAD_DWORD s26, s[0:3], 0x8 ; C20D0108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s22 ; 7E020216 V_MOV_B32_e32 v2, s23 ; 7E040217 V_MOV_B32_e32 v3, s24 ; 7E060218 V_MOV_B32_e32 v10, s25 ; 7E140219 V_MOV_B32_e32 v11, s26 ; 7E16021A V_MUL_F32_e32 v12, v9, v3 ; 10180709 V_FRACT_F32_e32 v13, v12 ; 7E1A410C V_SUB_F32_e32 v13, v12, v13 ; 081A1B0C V_ADD_F32_e32 v14, 5.000000e-01, v13 ; 061C1AF0 V_RCP_F32_e32 v3, v3 ; 7E065503 V_MUL_F32_e32 v15, v14, v3 ; 101E070E V_MUL_F32_e32 v3, v8, v10 ; 10061508 V_FRACT_F32_e32 v8, v3 ; 7E104103 V_SUB_F32_e32 v8, v3, v8 ; 08101103 V_ADD_F32_e32 v9, 5.000000e-01, v8 ; 061210F0 V_RCP_F32_e32 v10, v10 ; 7E14550A V_MUL_F32_e32 v14, v9, v10 ; 101C1509 V_MOV_B32_e32 v16, 0 ; 7E200280 S_LOAD_DWORDX4 s[24:27], s[4:5], 0x0 ; C08C0500 S_LOAD_DWORDX8 s[28:35], s[6:7], 0x0 ; C0CE0700 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE_L v[9:10], 9, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[28:35], s[24:27] ; F0900900 00C7090E V_MOV_B32_e32 v18, 2.550000e+02 ; 7E2402FF 437F0000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v19, v10, v18, 5.000000e-01, 0, 0 ; D2820013 03C2250A V_FRACT_F32_e32 v20, v19 ; 7E284113 V_SUB_F32_e32 v19, v19, v20 ; 08262913 V_MUL_F32_e32 v19, 2.560000e+02, v19 ; 102626FF 43800000 V_MAD_F32 v18, v9, v18, v19, 0, 0 ; D2820012 044E2509 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_FRACT_F32_e32 v19, v18 ; 7E264112 V_SUB_F32_e32 v18, v18, v19 ; 08242712 V_ADD_F32_e32 v18, 5.000000e-01, v18 ; 062424F0 V_MAD_F32 v9, v11, v9, 5.000000e-01, 0, 0 ; D2820009 03C2130B V_FRACT_F32_e32 v10, v9 ; 7E144109 V_SUB_F32_e32 v9, v9, v10 ; 08121509 V_ADD_F32_e32 v9, 5.000000e-01, v9 ; 061212F0 V_SUB_F32_e32 v10, 1.024000e+03, v11 ; 081416FF 44800000 V_CMP_GE_F32_e64 s[22:23], v10, 0.000000e+00, 0, 0 ; D00C0016 0001010A V_CNDMASK_B32_e64 v9, v18, v9, s[22:23], 0, 0, 0, 0 ; D2000009 005A1312 V_RCP_F32_e32 v10, v0 ; 7E145500 V_MAD_F32 v10, v2, v10, 5.000000e-01, 0, 0 ; D282000A 03C21502 V_FRACT_F32_e32 v11, v10 ; 7E16410A V_SUB_F32_e32 v10, v10, v11 ; 0814170A V_MUL_F32_e32 v11, v10, v9 ; 1016130A V_CMP_GE_F32_e64 s[22:23], v11, 0.000000e+00, 0, 0 ; D00C0016 0001010B V_MOV_B32_e32 v11, 0x80000000 ; 7E1602FF 80000000 V_XOR_B32_e32 v11, v10, v11 ; 3A16170A V_CNDMASK_B32_e64 v11, v11, v10, s[22:23], 0, 0, 0, 0 ; D200000B 005A150B V_RCP_F32_e32 v18, v11 ; 7E24550B V_MUL_F32_e32 v18, v18, v9 ; 10241312 V_FRACT_F32_e32 v18, v18 ; 7E244112 V_MUL_F32_e32 v11, v18, v11 ; 10161712 V_FRACT_F32_e32 v18, v11 ; 7E24410B V_SUB_F32_e32 v11, v11, v18 ; 0816250B V_SUB_F32_e32 v3, v3, v8 ; 08061103 V_ADD_F32_e32 v3, v3, v11 ; 06061703 V_MUL_F32_e32 v3, v3, v0 ; 10060103 V_FRACT_F32_e32 v8, v3 ; 7E104103 V_SUB_F32_e32 v3, v3, v8 ; 08061103 V_ADD_F32_e32 v3, 5.000000e-01, v3 ; 060606F0 V_RCP_F32_e32 v2, v2 ; 7E045502 V_MUL_F32_e32 v14, v3, v2 ; 101C0503 V_RCP_F32_e32 v2, v10 ; 7E04550A V_MUL_F32_e32 v2, v2, v9 ; 10041302 V_FRACT_F32_e32 v3, v2 ; 7E064102 V_SUB_F32_e32 v2, v2, v3 ; 08040702 V_SUB_F32_e32 v3, v12, v13 ; 08061B0C V_ADD_F32_e32 v2, v3, v2 ; 06040503 V_MUL_F32_e32 v0, v2, v0 ; 10000102 V_FRACT_F32_e32 v2, v0 ; 7E044100 V_SUB_F32_e32 v0, v0, v2 ; 08000500 V_ADD_F32_e32 v0, 5.000000e-01, v0 ; 060000F0 V_RCP_F32_e32 v1, v1 ; 7E025501 V_MUL_F32_e32 v15, v0, v1 ; 101E0300 IMAGE_SAMPLE_L v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[12:19], s[8:11] ; F0900F00 00430A0E S_WAITCNT vmcnt(0) ; BF8C0770 S_OR_B64 exec, exec, s[20:21] ; 88FE147E V_MUL_F32_e32 v0, v11, v6 ; 10000D0B V_MUL_F32_e32 v1, v10, v7 ; 10020F0A V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_MUL_F32_e32 v1, v12, v5 ; 10020B0C V_MUL_F32_e32 v2, v13, v4 ; 1004090D V_CVT_PKRTZ_F16_F32_e32 v1, v1, v2 ; 5E020501 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} 0: FRC TEMP[0].x, IN[1].xxxx 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: UARL ADDR[0].x, TEMP[2].xxxx 7: MUL TEMP[0], IN[0].yyyy, CONST[ADDR[0].x] 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: UARL ADDR[0].x, TEMP[2].xxxx 11: MAD TEMP[0], IN[0].xxxx, CONST[ADDR[0].x], TEMP[0] 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MAD TEMP[0], IN[0].zzzz, CONST[ADDR[0].x], TEMP[0] 17: F2I TEMP[1].x, TEMP[1].xxxx 18: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: UARL ADDR[0].x, TEMP[1].xxxx 21: MAD TEMP[0], IN[0].wwww, CONST[ADDR[0].x], TEMP[0] 22: MOV TEMP[1].xyz, IN[2].xxxx 23: MOV OUT[1], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0 %28 = add i32 %5, %7 %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %27, i32 0, i32 %28) %30 = extractelement <4 x float> %29, i32 0 %31 = call float @llvm.AMDIL.fraction.(float %25) %32 = fsub float -0.000000e+00, %31 %33 = fadd float %32, %25 %34 = fmul float %33, 4.000000e+00 %35 = fptosi float %34 to i32 %36 = bitcast i32 %35 to float %37 = bitcast float %36 to i32 %38 = add i32 1, %37 %39 = bitcast i32 %38 to float %40 = bitcast float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %18, %42 %44 = shl i32 %40, 4 %45 = add i32 %44, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %45) %47 = fmul float %18, %46 %48 = shl i32 %40, 4 %49 = add i32 %48, 8 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %18, %50 %52 = shl i32 %40, 4 %53 = add i32 %52, 12 %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %53) %55 = fmul float %18, %54 %56 = fptosi float %34 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = shl i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %17, %60 %62 = fadd float %61, %43 %63 = shl i32 %58, 4 %64 = add i32 %63, 4 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = fmul float %17, %65 %67 = fadd float %66, %47 %68 = shl i32 %58, 4 %69 = add i32 %68, 8 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %17, %70 %72 = fadd float %71, %51 %73 = shl i32 %58, 4 %74 = add i32 %73, 12 %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %74) %76 = fmul float %17, %75 %77 = fadd float %76, %55 %78 = fptosi float %34 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = add i32 2, %80 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = shl i32 %83, 4 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %19, %85 %87 = fadd float %86, %62 %88 = shl i32 %83, 4 %89 = add i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = fmul float %19, %90 %92 = fadd float %91, %67 %93 = shl i32 %83, 4 %94 = add i32 %93, 8 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = fmul float %19, %95 %97 = fadd float %96, %72 %98 = shl i32 %83, 4 %99 = add i32 %98, 12 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = fmul float %19, %100 %102 = fadd float %101, %77 %103 = fptosi float %34 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = add i32 3, %105 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = shl i32 %108, 4 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %20, %110 %112 = fadd float %111, %87 %113 = shl i32 %108, 4 %114 = add i32 %113, 4 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %20, %115 %117 = fadd float %116, %92 %118 = shl i32 %108, 4 %119 = add i32 %118, 8 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fmul float %20, %120 %122 = fadd float %121, %97 %123 = shl i32 %108, 4 %124 = add i32 %123, 12 %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %124) %126 = fmul float %20, %125 %127 = fadd float %126, %102 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %30, float %30, float %30, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %112, float %117, float %122, float %127) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_ADD_I32_e32 v0, s10, v0 ; 4A00000A S_LOAD_DWORDX4 s[4:7], s[8:9], 0x8 ; C0820908 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 V_MOV_B32_e32 v5, 0.000000e+00 ; 7E0A0280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v1, v1, v5 ; F800020F 05010101 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x4 ; C0820904 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 V_FRACT_F32_e32 v5, v1 ; 7E0A4101 V_SUB_F32_e32 v1, v5, v1 ; 08020305 V_MUL_F32_e32 v1, -4.000000e+00, v1 ; 100202F7 V_CVT_I32_F32_e32 v1, v1 ; 7E021101 V_LSHLREV_B32_e32 v2, 4, v1 ; 34040284 S_LOAD_DWORDX4 s[0:3], s[2:3], 0x0 ; C0800300 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_DWORD v3, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000302 V_ADD_I32_e32 v4, 1, v1 ; 4A080281 V_LSHLREV_B32_e32 v4, 4, v4 ; 34080884 BUFFER_LOAD_DWORD v5, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000504 S_LOAD_DWORDX4 s[4:7], s[8:9], 0x0 ; C0820900 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 BUFFER_LOAD_FORMAT_XYZW v[6:9], s[4:7][v0] + 0x0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010600 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v0, v7, v5 ; 10000B07 V_MAD_F32 v0, v6, v3, v0, 0, 0 ; D2820000 04020706 V_ADD_I32_e32 v3, 2, v1 ; 4A060282 V_LSHLREV_B32_e32 v3, 4, v3 ; 34060684 BUFFER_LOAD_DWORD v5, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000503 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v8, v5, v0, 0, 0 ; D2820000 04020B08 V_ADD_I32_e32 v1, 3, v1 ; 4A020283 V_LSHLREV_B32_e32 v1, 4, v1 ; 34020284 BUFFER_LOAD_DWORD v5, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000501 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v0, v9, v5, v0, 0, 0 ; D2820000 04020B09 V_OR_B32_e32 v5, 12, v2 ; 380A048C BUFFER_LOAD_DWORD v5, s[0:3] + v5 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000505 V_OR_B32_e32 v10, 12, v4 ; 3814088C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v10, v7, v10 ; 10141507 V_MAD_F32 v5, v6, v5, v10, 0, 0 ; D2820005 042A0B06 V_OR_B32_e32 v10, 12, v3 ; 3814068C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v5, v8, v10, v5, 0, 0 ; D2820005 04161508 V_OR_B32_e32 v10, 12, v1 ; 3814028C BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v5, v9, v10, v5, 0, 0 ; D2820005 04161509 V_OR_B32_e32 v10, 8, v2 ; 38140488 BUFFER_LOAD_DWORD v10, s[0:3] + v10 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000A0A V_OR_B32_e32 v11, 8, v4 ; 38160888 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v11, v7, v11 ; 10161707 V_MAD_F32 v10, v6, v10, v11, 0, 0 ; D282000A 042E1506 V_OR_B32_e32 v11, 8, v3 ; 38160688 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v10, v8, v11, v10, 0, 0 ; D282000A 042A1708 V_OR_B32_e32 v11, 8, v1 ; 38160288 BUFFER_LOAD_DWORD v11, s[0:3] + v11 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000B0B S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v10, v9, v11, v10, 0, 0 ; D282000A 042A1709 V_OR_B32_e32 v2, 4, v2 ; 38040484 BUFFER_LOAD_DWORD v2, s[0:3] + v2 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000202 V_OR_B32_e32 v4, 4, v4 ; 38080884 BUFFER_LOAD_DWORD v4, s[0:3] + v4 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000404 S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v4, v7, v4 ; 10080907 V_MAD_F32 v2, v6, v2, v4, 0, 0 ; D2820002 04120506 V_OR_B32_e32 v3, 4, v3 ; 38060684 BUFFER_LOAD_DWORD v3, s[0:3] + v3 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000303 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v2, v8, v3, v2, 0, 0 ; D2820002 040A0708 V_OR_B32_e32 v1, 4, v1 ; 38020284 BUFFER_LOAD_DWORD v1, s[0:3] + v1 + 0 + 0x0, glc=0, slc=0, tfe=0 ; E0301000 80000101 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v1, v9, v1, v2, 0, 0 ; D2820001 040A0309 EXP 15, 12, 0, 1, 0, v0, v1, v10, v5 ; F80008CF 050A0100 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xxxx, IMM[0].xyxx, IMM[0].xxxy 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = fmul float %22, 0.000000e+00 %24 = fadd float %23, 0.000000e+00 %25 = fmul float %22, 1.000000e+00 %26 = fadd float %25, 0.000000e+00 %27 = fmul float %22, 0.000000e+00 %28 = fadd float %27, 0.000000e+00 %29 = fmul float %22, 0.000000e+00 %30 = fadd float %29, 1.000000e+00 %31 = call i32 @llvm.SI.packf16(float %24, float %26) %32 = bitcast i32 %31 to float %33 = call i32 @llvm.SI.packf16(float %28, float %30) %34 = bitcast i32 %33 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %32, float %34, float %32, float %34) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_CVT_PKRTZ_F16_F32_e32 v0, 0.000000e+00, v2 ; 5E000480 V_CVT_PKRTZ_F16_F32_e64 v1, 0.000000e+00, 1.000000e+00, 0, 0 ; D25E0001 0001E480 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000